{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (c) 2020, NVIDIA CORPORATION.\n",
    "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "you may not use this file except in compliance with the License.\n",
    "You may obtain a copy of the License at\n",
    "    http://www.apache.org/licenses/LICENSE-2.0\n",
    "Unless required by applicable law or agreed to in writing, software\n",
    "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "See the License for the specific language governing permissions and\n",
    "limitations under the License."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline  \n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>...</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "      <th>16</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>101\\t1942\\t18628\\t15752\\t4458\\t7697\\t24309\\t10...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>E7D6C5094767223F6F8789A87A1937AB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>D557B03872EF8986F7F4426AE094B2FE</td>\n",
       "      <td>...</td>\n",
       "      <td>00000776B07587ECA9717BFC301F2D6E</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>101\\t56898\\t137\\t94836\\t101481\\t10245\\t11166\\t...</td>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>129F4A868712BA2B98D31AF98C3066E4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>424822AC982CE0E8965506C63B44EC12</td>\n",
       "      <td>...</td>\n",
       "      <td>00000B85AAF7DE172876FD96718C4469</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581498e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581498e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>101\\t98377\\t22627\\t33499\\t25053\\t10898\\t3793\\t...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>04C6C2175852CDBBC23B2446C7E7C22D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>1EC14E26417AA926095530AC591BA9CE</td>\n",
       "      <td>...</td>\n",
       "      <td>00000E0C9B364891CDE89ECFC54771DE</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581061e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>101\\t56898\\t137\\t11161\\t73421\\t131\\t137\\t11887...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>168157826315514C120494D4DF8E6216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>9B9595B6FEB8948BDDF0D222F27E0118</td>\n",
       "      <td>...</td>\n",
       "      <td>00000F04EEDBCF3E1FB9A1948BF353B6</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581329e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101\\t100\\t119\\t6694\\t6546\\t5621\\t2446\\t1975\\t2...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>B3E3673782A69D9D8A45D3B222F0B073</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>525DC99B7CB8F1AC4AD3E66C53FA38E0</td>\n",
       "      <td>...</td>\n",
       "      <td>000010088197DA00D659853E06935B3E</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.580958e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121386426</th>\n",
       "      <td>101\\t100\\t100\\t13740\\t71933\\t10173\\t19188\\t143...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>259A1FEE97F75D075C7178FC90B14B95</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DF9B50100C06319F269BC2D1496235F6</td>\n",
       "      <td>D151F7A4EAA7359747D0663582BB8AF7</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581526242</td>\n",
       "      <td>C584509CC54FD65E7A40A360AAD5C8AA</td>\n",
       "      <td>...</td>\n",
       "      <td>FFFFFC3F7D026D5564658D5FEDACD898</td>\n",
       "      <td>222</td>\n",
       "      <td>541</td>\n",
       "      <td>False</td>\n",
       "      <td>1482679314</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121386427</th>\n",
       "      <td>101\\t56898\\t137\\t38571\\t11189\\t28128\\t11401\\t1...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3D2C3326D15E555D394A54153F40A0E4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581170319</td>\n",
       "      <td>480314680FC1947F25ECFCA9A61A4DD3</td>\n",
       "      <td>...</td>\n",
       "      <td>FFFFFC3F7D026D5564658D5FEDACD898</td>\n",
       "      <td>221</td>\n",
       "      <td>530</td>\n",
       "      <td>False</td>\n",
       "      <td>1482679314</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121386428</th>\n",
       "      <td>101\\t18484\\t12111\\t67354\\t10112\\t10285\\t100\\t1...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D54906CC20849EF90458DB2A37496700</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581014731</td>\n",
       "      <td>21576CE4D3C626DC384DFE5871EC8E7A</td>\n",
       "      <td>...</td>\n",
       "      <td>FFFFFC3F7D026D5564658D5FEDACD898</td>\n",
       "      <td>222</td>\n",
       "      <td>541</td>\n",
       "      <td>False</td>\n",
       "      <td>1482679314</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581061e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121386429</th>\n",
       "      <td>101\\t13000\\t19528\\t10301\\t169\\t108193\\t14724\\t...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D6397D90D8FA2F20B090471B25C8B830</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581354842</td>\n",
       "      <td>AEBAC88A2B9ED78DE8D6A877CEF593C6</td>\n",
       "      <td>...</td>\n",
       "      <td>FFFFFD1055CAC7796CC27CB89FDB4B2F</td>\n",
       "      <td>403</td>\n",
       "      <td>451</td>\n",
       "      <td>False</td>\n",
       "      <td>1393130113</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121386430</th>\n",
       "      <td>101\\t10072\\t4460\\t20019\\t2195\\t2149\\t2287\\t839...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3753AC589D514B50B9E02F287A6094B3</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581360762</td>\n",
       "      <td>D1B4032B71C488B041BC6CA6D64B6BF2</td>\n",
       "      <td>...</td>\n",
       "      <td>FFFFFE97AA06327403491D71E2ED52DA</td>\n",
       "      <td>719</td>\n",
       "      <td>463</td>\n",
       "      <td>False</td>\n",
       "      <td>1501554925</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581371e+09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>121386431 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                          0   \\\n",
       "0          101\\t1942\\t18628\\t15752\\t4458\\t7697\\t24309\\t10...   \n",
       "1          101\\t56898\\t137\\t94836\\t101481\\t10245\\t11166\\t...   \n",
       "2          101\\t98377\\t22627\\t33499\\t25053\\t10898\\t3793\\t...   \n",
       "3          101\\t56898\\t137\\t11161\\t73421\\t131\\t137\\t11887...   \n",
       "4          101\\t100\\t119\\t6694\\t6546\\t5621\\t2446\\t1975\\t2...   \n",
       "...                                                      ...   \n",
       "121386426  101\\t100\\t100\\t13740\\t71933\\t10173\\t19188\\t143...   \n",
       "121386427  101\\t56898\\t137\\t38571\\t11189\\t28128\\t11401\\t1...   \n",
       "121386428  101\\t18484\\t12111\\t67354\\t10112\\t10285\\t100\\t1...   \n",
       "121386429  101\\t13000\\t19528\\t10301\\t169\\t108193\\t14724\\t...   \n",
       "121386430  101\\t10072\\t4460\\t20019\\t2195\\t2149\\t2287\\t839...   \n",
       "\n",
       "                                                          1   \\\n",
       "0                                                        NaN   \n",
       "1          83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...   \n",
       "2                                                        NaN   \n",
       "3                                                        NaN   \n",
       "4                                                        NaN   \n",
       "...                                                      ...   \n",
       "121386426                                                NaN   \n",
       "121386427                                                NaN   \n",
       "121386428                                                NaN   \n",
       "121386429                                                NaN   \n",
       "121386430                                                NaN   \n",
       "\n",
       "                                         2      3   \\\n",
       "0          E7D6C5094767223F6F8789A87A1937AB    NaN   \n",
       "1          129F4A868712BA2B98D31AF98C3066E4    NaN   \n",
       "2          04C6C2175852CDBBC23B2446C7E7C22D    NaN   \n",
       "3          168157826315514C120494D4DF8E6216    NaN   \n",
       "4          B3E3673782A69D9D8A45D3B222F0B073  Photo   \n",
       "...                                     ...    ...   \n",
       "121386426  259A1FEE97F75D075C7178FC90B14B95    NaN   \n",
       "121386427  3D2C3326D15E555D394A54153F40A0E4    NaN   \n",
       "121386428  D54906CC20849EF90458DB2A37496700  Photo   \n",
       "121386429  D6397D90D8FA2F20B090471B25C8B830    NaN   \n",
       "121386430  3753AC589D514B50B9E02F287A6094B3  Photo   \n",
       "\n",
       "                                         4                                 5   \\\n",
       "0                                       NaN                               NaN   \n",
       "1                                       NaN                               NaN   \n",
       "2          DDFFB4C01DB85921C3580F614575AA6D  BE4539C53C53FFABCFD232DB100C792B   \n",
       "3                                       NaN                               NaN   \n",
       "4                                       NaN                               NaN   \n",
       "...                                     ...                               ...   \n",
       "121386426  DF9B50100C06319F269BC2D1496235F6  D151F7A4EAA7359747D0663582BB8AF7   \n",
       "121386427                               NaN                               NaN   \n",
       "121386428                               NaN                               NaN   \n",
       "121386429                               NaN                               NaN   \n",
       "121386430                               NaN                               NaN   \n",
       "\n",
       "                 6                                 7           8   \\\n",
       "0          TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581262691   \n",
       "1           Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581497241   \n",
       "2          TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580978528   \n",
       "3           Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581321849   \n",
       "4          TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580956787   \n",
       "...             ...                               ...         ...   \n",
       "121386426  TopLevel  06D61DCBBE938971E1EA0C38BD9B5446  1581526242   \n",
       "121386427   Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581170319   \n",
       "121386428  TopLevel  06D61DCBBE938971E1EA0C38BD9B5446  1581014731   \n",
       "121386429  TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581354842   \n",
       "121386430  TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581360762   \n",
       "\n",
       "                                         9   ...  \\\n",
       "0          D557B03872EF8986F7F4426AE094B2FE  ...   \n",
       "1          424822AC982CE0E8965506C63B44EC12  ...   \n",
       "2          1EC14E26417AA926095530AC591BA9CE  ...   \n",
       "3          9B9595B6FEB8948BDDF0D222F27E0118  ...   \n",
       "4          525DC99B7CB8F1AC4AD3E66C53FA38E0  ...   \n",
       "...                                     ...  ...   \n",
       "121386426  C584509CC54FD65E7A40A360AAD5C8AA  ...   \n",
       "121386427  480314680FC1947F25ECFCA9A61A4DD3  ...   \n",
       "121386428  21576CE4D3C626DC384DFE5871EC8E7A  ...   \n",
       "121386429  AEBAC88A2B9ED78DE8D6A877CEF593C6  ...   \n",
       "121386430  D1B4032B71C488B041BC6CA6D64B6BF2  ...   \n",
       "\n",
       "                                         14    15   16     17          18  \\\n",
       "0          00000776B07587ECA9717BFC301F2D6E    94  648  False  1478011810   \n",
       "1          00000B85AAF7DE172876FD96718C4469  1139   46  False  1540395738   \n",
       "2          00000E0C9B364891CDE89ECFC54771DE   780  440  False  1432084055   \n",
       "3          00000F04EEDBCF3E1FB9A1948BF353B6     1   45  False  1534313747   \n",
       "4          000010088197DA00D659853E06935B3E   171  388  False  1490166885   \n",
       "...                                     ...   ...  ...    ...         ...   \n",
       "121386426  FFFFFC3F7D026D5564658D5FEDACD898   222  541  False  1482679314   \n",
       "121386427  FFFFFC3F7D026D5564658D5FEDACD898   221  530  False  1482679314   \n",
       "121386428  FFFFFC3F7D026D5564658D5FEDACD898   222  541  False  1482679314   \n",
       "121386429  FFFFFD1055CAC7796CC27CB89FDB4B2F   403  451  False  1393130113   \n",
       "121386430  FFFFFE97AA06327403491D71E2ED52DA   719  463  False  1501554925   \n",
       "\n",
       "              19  20            21  22            23  \n",
       "0          False NaN           NaN NaN           NaN  \n",
       "1           True NaN  1.581498e+09 NaN  1.581498e+09  \n",
       "2           True NaN           NaN NaN  1.581061e+09  \n",
       "3          False NaN           NaN NaN  1.581329e+09  \n",
       "4          False NaN           NaN NaN  1.580958e+09  \n",
       "...          ...  ..           ...  ..           ...  \n",
       "121386426   True NaN           NaN NaN           NaN  \n",
       "121386427  False NaN           NaN NaN           NaN  \n",
       "121386428  False NaN           NaN NaN  1.581061e+09  \n",
       "121386429  False NaN           NaN NaN           NaN  \n",
       "121386430   True NaN           NaN NaN  1.581371e+09  \n",
       "\n",
       "[121386431 rows x 24 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DTYPES = {\n",
    "    '8': 'int32',\n",
    "    '10': 'int32',\n",
    "    '11': 'int32',\n",
    "    '13': 'int32',\n",
    "    '15': 'int32',\n",
    "    '16': 'int32',\n",
    "    '18': 'int32',\n",
    "    '20': 'float32',\n",
    "    '21': 'float32',\n",
    "    '22': 'float32',\n",
    "    '23': 'float32',\n",
    "}\n",
    "\n",
    "df = pd.read_csv('../input/training.tsv', sep='\\x01', header=None, dtype=DTYPES )\n",
    "gc.collect()\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(121386431, 23)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_user_id</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E7D6C5094767223F6F8789A87A1937AB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>D557B03872EF8986F7F4426AE094B2FE</td>\n",
       "      <td>986</td>\n",
       "      <td>...</td>\n",
       "      <td>00000776B07587ECA9717BFC301F2D6E</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>129F4A868712BA2B98D31AF98C3066E4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>424822AC982CE0E8965506C63B44EC12</td>\n",
       "      <td>1225</td>\n",
       "      <td>...</td>\n",
       "      <td>00000B85AAF7DE172876FD96718C4469</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581498e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581498e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>04C6C2175852CDBBC23B2446C7E7C22D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>1EC14E26417AA926095530AC591BA9CE</td>\n",
       "      <td>3016</td>\n",
       "      <td>...</td>\n",
       "      <td>00000E0C9B364891CDE89ECFC54771DE</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581061e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>168157826315514C120494D4DF8E6216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>9B9595B6FEB8948BDDF0D222F27E0118</td>\n",
       "      <td>2121</td>\n",
       "      <td>...</td>\n",
       "      <td>00000F04EEDBCF3E1FB9A1948BF353B6</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581329e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B3E3673782A69D9D8A45D3B222F0B073</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>525DC99B7CB8F1AC4AD3E66C53FA38E0</td>\n",
       "      <td>813505</td>\n",
       "      <td>...</td>\n",
       "      <td>000010088197DA00D659853E06935B3E</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.580958e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>D20331F40622336B266EC2BF3572F7E5</td>\n",
       "      <td>AB21A06B694D637075F1EA4F89A05197</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>167115458A0DBDFF7E9C0C53A83BAC9B</td>\n",
       "      <td>1581341389</td>\n",
       "      <td>7E1E2FAD93219D0247BDBE451AB343E9</td>\n",
       "      <td>47678</td>\n",
       "      <td>...</td>\n",
       "      <td>000012A6D58B300B1B4098C86223F76E</td>\n",
       "      <td>1927</td>\n",
       "      <td>1414</td>\n",
       "      <td>False</td>\n",
       "      <td>1368483885</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581347e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581347e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>166C053A658691172A7A3CB20D8FB614</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581004518</td>\n",
       "      <td>A0FD6DF4B4FBF62949708CDB97CC8124</td>\n",
       "      <td>29358</td>\n",
       "      <td>...</td>\n",
       "      <td>000012D4971A83624EF9C6711AE5167D</td>\n",
       "      <td>929</td>\n",
       "      <td>928</td>\n",
       "      <td>False</td>\n",
       "      <td>1577292324</td>\n",
       "      <td>True</td>\n",
       "      <td>1.581009e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C6016D70FDDAF88BB64B00600B48F788</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581020186</td>\n",
       "      <td>629A622B84E4C67FAB56DCF0DBD785AA</td>\n",
       "      <td>43097</td>\n",
       "      <td>...</td>\n",
       "      <td>000013E6563760E3916215D42BE0D406</td>\n",
       "      <td>286</td>\n",
       "      <td>524</td>\n",
       "      <td>False</td>\n",
       "      <td>1439811227</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>293740496A195D5B20DBE00C3AEFFF17</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581005860</td>\n",
       "      <td>AD86A376FA5F26E67263D5FCA8A5BD59</td>\n",
       "      <td>279</td>\n",
       "      <td>...</td>\n",
       "      <td>00001607209C5774DF9207A2AC0EED5F</td>\n",
       "      <td>461</td>\n",
       "      <td>697</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581009e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3D89E8BE2E330DA8DD754D58EA07E824</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581103914</td>\n",
       "      <td>2F236A7D11ECDAF1FC472E9ACC32AE6B</td>\n",
       "      <td>2287014</td>\n",
       "      <td>...</td>\n",
       "      <td>0000170273D2530A0DF580401CC32AE0</td>\n",
       "      <td>42</td>\n",
       "      <td>118</td>\n",
       "      <td>False</td>\n",
       "      <td>1295601797</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>D545FDCEEE2C82E21EBBE23505173FA0</td>\n",
       "      <td>311EDE393CEBB5E880F5B3A96A69AA94</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>4DC22C3F31C5C43721E6B5815A595ED6</td>\n",
       "      <td>1581187934</td>\n",
       "      <td>85A39142470D65A77BB9E86B054AD321</td>\n",
       "      <td>19401</td>\n",
       "      <td>...</td>\n",
       "      <td>00001F56CDCF81D2EF635B3C0EDE57EB</td>\n",
       "      <td>51</td>\n",
       "      <td>161</td>\n",
       "      <td>False</td>\n",
       "      <td>1397500466</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581190e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B9C4540F4798A61C1F17CEB3AE369735</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580959419</td>\n",
       "      <td>0C89F01A8644F9B685BF2225F00EF34B</td>\n",
       "      <td>832</td>\n",
       "      <td>...</td>\n",
       "      <td>00002086C1D5C05ADE95E1C60FAF71FD</td>\n",
       "      <td>22</td>\n",
       "      <td>251</td>\n",
       "      <td>False</td>\n",
       "      <td>1439637842</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>40A23C9DE38F5B42FDABD7DE6B73AC6E</td>\n",
       "      <td>EC2126D5DB025A6C66BC24A5596EC475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581043204</td>\n",
       "      <td>46097861C196B25F2C56606CD32AE14C</td>\n",
       "      <td>159570</td>\n",
       "      <td>...</td>\n",
       "      <td>00002086C1D5C05ADE95E1C60FAF71FD</td>\n",
       "      <td>22</td>\n",
       "      <td>251</td>\n",
       "      <td>False</td>\n",
       "      <td>1439637842</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581045e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10932CB9E641857BACF9A49D267A8E98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5E671E810416A48B15C2C03D66306322</td>\n",
       "      <td>3330516EAADFD093A1C7B9DA83172DEB</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581362695</td>\n",
       "      <td>618642C3684BC6B65F905C289D1F376F</td>\n",
       "      <td>1350529</td>\n",
       "      <td>...</td>\n",
       "      <td>000024E52825D248DDAB9884DC0BD758</td>\n",
       "      <td>205</td>\n",
       "      <td>86</td>\n",
       "      <td>False</td>\n",
       "      <td>1402661478</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581375e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>C321B459658CB455C53F2DE7C235A706\\t34A45F5ED2E4...</td>\n",
       "      <td>86E049967C94CF82BA9EB17A2EE4F3CE</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581062422</td>\n",
       "      <td>49D04A274A357E3039E1DF0F78E2975F</td>\n",
       "      <td>82353</td>\n",
       "      <td>...</td>\n",
       "      <td>000025CDC48B25D9888C6640DE433FE9</td>\n",
       "      <td>96</td>\n",
       "      <td>167</td>\n",
       "      <td>False</td>\n",
       "      <td>1562685797</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581064e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>NaN</td>\n",
       "      <td>222EF495AA79DCCC3F32AB9754E9F173</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581197081</td>\n",
       "      <td>162984295C1BCFF55D7977B9ADA50AD8</td>\n",
       "      <td>49949</td>\n",
       "      <td>...</td>\n",
       "      <td>000026C296F4693A6196F90ABAF80FB6</td>\n",
       "      <td>17</td>\n",
       "      <td>370</td>\n",
       "      <td>False</td>\n",
       "      <td>1465619079</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>56402281D4D3E8F951CB23AC700A8F5C</td>\n",
       "      <td>9CF609C0D9D099F7C09B52976F5029F7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581017422</td>\n",
       "      <td>A702303ED1A25C6DD1393BC0CCABF94A</td>\n",
       "      <td>231537</td>\n",
       "      <td>...</td>\n",
       "      <td>0000288C66B7563CB98736F96894D9DC</td>\n",
       "      <td>187</td>\n",
       "      <td>397</td>\n",
       "      <td>False</td>\n",
       "      <td>1577798166</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581018e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581018e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>805C7A95714ADBA122FF5F55C9C9C8E4\\t6372CE4C7D66...</td>\n",
       "      <td>6601197539548AAD5C9CB9FCD162A5C7</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581521058</td>\n",
       "      <td>0FACF6DB63422FC388A2BB4AA1585AB2</td>\n",
       "      <td>4624448</td>\n",
       "      <td>...</td>\n",
       "      <td>00002C99ACC8931540C190542549BFE0</td>\n",
       "      <td>32</td>\n",
       "      <td>110</td>\n",
       "      <td>False</td>\n",
       "      <td>1279606330</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>D8B2515734BFAA8902070F3A9C4F6BCD</td>\n",
       "      <td>B72C9FD782AF79BA56755410E3C617FC</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581081184</td>\n",
       "      <td>0D01AD1116E22830152CC5FC78CE952D</td>\n",
       "      <td>1873</td>\n",
       "      <td>...</td>\n",
       "      <td>000030DA986805A0B204966360B8AABB</td>\n",
       "      <td>143</td>\n",
       "      <td>119</td>\n",
       "      <td>False</td>\n",
       "      <td>1559584136</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C33F498C0F3FE07667B08B834A2FF474</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581260062</td>\n",
       "      <td>5B8C4D92F1AE859A2FD69BB405B03347</td>\n",
       "      <td>314</td>\n",
       "      <td>...</td>\n",
       "      <td>00003B622698D49D3868B01E557FE4E6</td>\n",
       "      <td>457</td>\n",
       "      <td>430</td>\n",
       "      <td>False</td>\n",
       "      <td>1381246972</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581260e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NaN</td>\n",
       "      <td>35359883026EF6D9A31F6C962517A15D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581368704</td>\n",
       "      <td>418D921524B4A26261D5F24586CCEDD7</td>\n",
       "      <td>43765</td>\n",
       "      <td>...</td>\n",
       "      <td>000043D9A730DF47697D0750F509B56A</td>\n",
       "      <td>871</td>\n",
       "      <td>927</td>\n",
       "      <td>False</td>\n",
       "      <td>1359042454</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581369e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C95D525FFA8A789ABBA005B9F7BE7174</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AEDE23492BD06619BF93EEB39E83B5E4</td>\n",
       "      <td>8E1AE5ECC4EFDC42F77501ED9AA002ED</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580950322</td>\n",
       "      <td>9586F04FF09A614BAE30367575053EC4</td>\n",
       "      <td>3418</td>\n",
       "      <td>...</td>\n",
       "      <td>000043D9A730DF47697D0750F509B56A</td>\n",
       "      <td>871</td>\n",
       "      <td>927</td>\n",
       "      <td>False</td>\n",
       "      <td>1359042454</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.580963e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3C21DCFB8E3FEC1CB3D2BFB413A78220</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>76B8A9C3013AE6414A3E6012413CDC3B</td>\n",
       "      <td>1581467323</td>\n",
       "      <td>D1AA2C85FA644D64346EDD88470525F2</td>\n",
       "      <td>737</td>\n",
       "      <td>...</td>\n",
       "      <td>000046C8606F1C3F5A7296222C88084B</td>\n",
       "      <td>131</td>\n",
       "      <td>2105</td>\n",
       "      <td>False</td>\n",
       "      <td>1573978269</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>F9B5807EDB6B215DF96862159A6B4892</td>\n",
       "      <td>C83B3441704D4E6172B86CEF3F66845C</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581147754</td>\n",
       "      <td>5611A5DB21B6C40C8E3848A4DECFBB24</td>\n",
       "      <td>77584</td>\n",
       "      <td>...</td>\n",
       "      <td>00004DF6D6CAB6361EDF8FDE86365ECE</td>\n",
       "      <td>59</td>\n",
       "      <td>194</td>\n",
       "      <td>False</td>\n",
       "      <td>1352843123</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>42E7832DAB2B068F63B32B6AD85B3F9D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581226105</td>\n",
       "      <td>24F43670B7EF0441CCF800C6B77403BC</td>\n",
       "      <td>735882</td>\n",
       "      <td>...</td>\n",
       "      <td>0000510EC3AFC6F9FB8E411852A84877</td>\n",
       "      <td>222</td>\n",
       "      <td>205</td>\n",
       "      <td>False</td>\n",
       "      <td>1431360312</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C281F4EF6011EFC61BDE3E96216C0A6A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>A0C7021AD8299ADF0C9EBE326C115F6F</td>\n",
       "      <td>1581182880</td>\n",
       "      <td>1216DCF3AF02393B3139C17C28DDDE02</td>\n",
       "      <td>358</td>\n",
       "      <td>...</td>\n",
       "      <td>0000510EC3AFC6F9FB8E411852A84877</td>\n",
       "      <td>222</td>\n",
       "      <td>205</td>\n",
       "      <td>False</td>\n",
       "      <td>1431360312</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581191e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C7FB1313DB6D1013282343FFA2AF41FD</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581540621</td>\n",
       "      <td>E4B09E5CE7BC5D9FB753CDFCF63ACCC0</td>\n",
       "      <td>471015</td>\n",
       "      <td>...</td>\n",
       "      <td>0000581864A04C34E289F984EBD20562</td>\n",
       "      <td>895</td>\n",
       "      <td>426</td>\n",
       "      <td>False</td>\n",
       "      <td>1271603280</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581548e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>57BDABF6E201A2D0B6C7652379FABEEE</td>\n",
       "      <td>28D9CAE78D1F3E9877A9559757D57293</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580954292</td>\n",
       "      <td>5A6DCA175E07A222854B7F115C50A3B8</td>\n",
       "      <td>832</td>\n",
       "      <td>...</td>\n",
       "      <td>00005B5734CD2CB88CBD541ADEBA0F4F</td>\n",
       "      <td>247</td>\n",
       "      <td>391</td>\n",
       "      <td>False</td>\n",
       "      <td>1292752814</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.580956e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.580956e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>57BDABF6E201A2D0B6C7652379FABEEE</td>\n",
       "      <td>558CAF543304741E670682E4F6CDA1CC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581129521</td>\n",
       "      <td>8397BF026AF4CD632EBB10441B55A22F</td>\n",
       "      <td>830</td>\n",
       "      <td>...</td>\n",
       "      <td>00005B5734CD2CB88CBD541ADEBA0F4F</td>\n",
       "      <td>248</td>\n",
       "      <td>391</td>\n",
       "      <td>False</td>\n",
       "      <td>1292752814</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NaN</td>\n",
       "      <td>D2F0691D4B7D3933824640F6AED9D308</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>125C57F4FA6D4E110983FB11B52EFD4E</td>\n",
       "      <td>1581219559</td>\n",
       "      <td>758F32F4069006B21E15EA41BC9EBCEA</td>\n",
       "      <td>3079</td>\n",
       "      <td>...</td>\n",
       "      <td>00005BD9676C7C12A80E686070A180BD</td>\n",
       "      <td>135</td>\n",
       "      <td>216</td>\n",
       "      <td>False</td>\n",
       "      <td>1566636367</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581220e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8AB541DCDB76A5DB2B36A51653D94185</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581370024</td>\n",
       "      <td>398EB2EBB16F9F19AF6F6E736EA3CAB6</td>\n",
       "      <td>300</td>\n",
       "      <td>...</td>\n",
       "      <td>00005FFBE9E33A6EAD7B5A70C1084271</td>\n",
       "      <td>282</td>\n",
       "      <td>283</td>\n",
       "      <td>False</td>\n",
       "      <td>1481197266</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581371e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3D87CC3655C276F1771752081423B405</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BB422AA00380E45F312FD2CAA75F4960</td>\n",
       "      <td>92D397F8E0F1E77B36B8C612C2C51E23</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580975391</td>\n",
       "      <td>4DC65AC7BD963DE1F7617C047C33DE99</td>\n",
       "      <td>52366425</td>\n",
       "      <td>...</td>\n",
       "      <td>00006047187D0D18598EF12A650E1DAC</td>\n",
       "      <td>22</td>\n",
       "      <td>50</td>\n",
       "      <td>False</td>\n",
       "      <td>1340673962</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>DB32BD91C2F1B37BE700F374A07FBC61</td>\n",
       "      <td>3701848B96AA740528A2B0E247777D7D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2423BA02A75DB2189335DDC3FB6B74A1</td>\n",
       "      <td>6D323BE93766E79BE423FAC5C28BE39B</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581257232</td>\n",
       "      <td>5C671539CB41B9807E209349B101E9FF</td>\n",
       "      <td>988</td>\n",
       "      <td>...</td>\n",
       "      <td>0000648BAA193AE4C625DDF789B57172</td>\n",
       "      <td>251</td>\n",
       "      <td>719</td>\n",
       "      <td>False</td>\n",
       "      <td>1456473671</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6A17A38E3ECDA343C2B147F653C8750E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>FA3F382BC409C271E3D6EAF8BE4648DD</td>\n",
       "      <td>1581420958</td>\n",
       "      <td>2B4E3136FD6C06BF75052DFDF20CF1DE</td>\n",
       "      <td>2822</td>\n",
       "      <td>...</td>\n",
       "      <td>000067AEB5C52781117CADDCB811D2A0</td>\n",
       "      <td>257</td>\n",
       "      <td>240</td>\n",
       "      <td>False</td>\n",
       "      <td>1509932123</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>EA152CAF6EE7894E914C522BD1B5E702</td>\n",
       "      <td>B8DE8D8CD04864D008EE21081034A9A7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581313083</td>\n",
       "      <td>4A91410143FAC6824BFCADC928251106</td>\n",
       "      <td>12072</td>\n",
       "      <td>...</td>\n",
       "      <td>000070F30341E475432BAAA845117D6C</td>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>False</td>\n",
       "      <td>1573941072</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581314e+09</td>\n",
       "      <td>1.581314e+09</td>\n",
       "      <td>1.581314e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>NaN</td>\n",
       "      <td>18176C6AD2871729384062F073CCE94D</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581164292</td>\n",
       "      <td>70B900BE17416923D1E236A38798F202</td>\n",
       "      <td>1228134</td>\n",
       "      <td>...</td>\n",
       "      <td>000071667F50BAFEA722A8E8284581E5</td>\n",
       "      <td>18</td>\n",
       "      <td>58</td>\n",
       "      <td>False</td>\n",
       "      <td>1378427564</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581305e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>NaN</td>\n",
       "      <td>D42E2D62B7F6B940E6282495A444DB4E</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581197829</td>\n",
       "      <td>C9A9E1DE113C10893F85C4F3C6DBED1C</td>\n",
       "      <td>192</td>\n",
       "      <td>...</td>\n",
       "      <td>00007227679DC76C1C32802F0A2FDCD0</td>\n",
       "      <td>94</td>\n",
       "      <td>130</td>\n",
       "      <td>False</td>\n",
       "      <td>1538249361</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581232e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10C2BF422C4605FF55FE23B8C92A2973</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581311592</td>\n",
       "      <td>CE89F7F296A0F73D55E2CC0922AAF80D</td>\n",
       "      <td>535</td>\n",
       "      <td>...</td>\n",
       "      <td>0000761240F60756083A27D1BA42EF85</td>\n",
       "      <td>308</td>\n",
       "      <td>368</td>\n",
       "      <td>False</td>\n",
       "      <td>1436762988</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>NaN</td>\n",
       "      <td>AF11AF01F842E7F120667B7B0B38676D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581233650</td>\n",
       "      <td>E94C0E9E8494F3D603F9D1A5C5242E3D</td>\n",
       "      <td>73</td>\n",
       "      <td>...</td>\n",
       "      <td>00007745A6EE969F1A0F44B10DC17671</td>\n",
       "      <td>268</td>\n",
       "      <td>526</td>\n",
       "      <td>False</td>\n",
       "      <td>1252294800</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>12CC826EEC2C231DF77F499DCFDDE7DD</td>\n",
       "      <td>4A8129618E3D06342F5BB3A0987E61F0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>B9175601E87101A984A50F8A62A1C374</td>\n",
       "      <td>1581269998</td>\n",
       "      <td>1F2AAAFD6E725A4652D9A63D9C35997C</td>\n",
       "      <td>735</td>\n",
       "      <td>...</td>\n",
       "      <td>00007C6F44F143C51F142F109DC84E3F</td>\n",
       "      <td>538</td>\n",
       "      <td>1538</td>\n",
       "      <td>False</td>\n",
       "      <td>1483700135</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.581270e+09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             hashtags  \\\n",
       "0                                                 NaN   \n",
       "1   83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                    D20331F40622336B266EC2BF3572F7E5   \n",
       "6                                                 NaN   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                   D545FDCEEE2C82E21EBBE23505173FA0   \n",
       "11                                                NaN   \n",
       "12                   40A23C9DE38F5B42FDABD7DE6B73AC6E   \n",
       "13                                                NaN   \n",
       "14  C321B459658CB455C53F2DE7C235A706\\t34A45F5ED2E4...   \n",
       "15                                                NaN   \n",
       "16                   56402281D4D3E8F951CB23AC700A8F5C   \n",
       "17  805C7A95714ADBA122FF5F55C9C9C8E4\\t6372CE4C7D66...   \n",
       "18                   D8B2515734BFAA8902070F3A9C4F6BCD   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22                                                NaN   \n",
       "23                   F9B5807EDB6B215DF96862159A6B4892   \n",
       "24                                                NaN   \n",
       "25                                                NaN   \n",
       "26                                                NaN   \n",
       "27                   57BDABF6E201A2D0B6C7652379FABEEE   \n",
       "28                   57BDABF6E201A2D0B6C7652379FABEEE   \n",
       "29                                                NaN   \n",
       "30                                                NaN   \n",
       "31                                                NaN   \n",
       "32                   DB32BD91C2F1B37BE700F374A07FBC61   \n",
       "33                                                NaN   \n",
       "34                   EA152CAF6EE7894E914C522BD1B5E702   \n",
       "35                                                NaN   \n",
       "36                                                NaN   \n",
       "37                                                NaN   \n",
       "38                                                NaN   \n",
       "39                   12CC826EEC2C231DF77F499DCFDDE7DD   \n",
       "\n",
       "                            tweet_id                media  \\\n",
       "0   E7D6C5094767223F6F8789A87A1937AB                  NaN   \n",
       "1   129F4A868712BA2B98D31AF98C3066E4                  NaN   \n",
       "2   04C6C2175852CDBBC23B2446C7E7C22D                  NaN   \n",
       "3   168157826315514C120494D4DF8E6216                  NaN   \n",
       "4   B3E3673782A69D9D8A45D3B222F0B073                Photo   \n",
       "5   AB21A06B694D637075F1EA4F89A05197                Video   \n",
       "6   166C053A658691172A7A3CB20D8FB614  Photo\\tPhoto\\tPhoto   \n",
       "7   C6016D70FDDAF88BB64B00600B48F788                  NaN   \n",
       "8   293740496A195D5B20DBE00C3AEFFF17                Video   \n",
       "9   3D89E8BE2E330DA8DD754D58EA07E824                Video   \n",
       "10  311EDE393CEBB5E880F5B3A96A69AA94                Photo   \n",
       "11  B9C4540F4798A61C1F17CEB3AE369735         Photo\\tPhoto   \n",
       "12  EC2126D5DB025A6C66BC24A5596EC475                  NaN   \n",
       "13  10932CB9E641857BACF9A49D267A8E98                  NaN   \n",
       "14  86E049967C94CF82BA9EB17A2EE4F3CE  Photo\\tPhoto\\tPhoto   \n",
       "15  222EF495AA79DCCC3F32AB9754E9F173                  NaN   \n",
       "16  9CF609C0D9D099F7C09B52976F5029F7                  NaN   \n",
       "17  6601197539548AAD5C9CB9FCD162A5C7         Photo\\tPhoto   \n",
       "18  B72C9FD782AF79BA56755410E3C617FC                Video   \n",
       "19  C33F498C0F3FE07667B08B834A2FF474                Video   \n",
       "20  35359883026EF6D9A31F6C962517A15D                  NaN   \n",
       "21  C95D525FFA8A789ABBA005B9F7BE7174                  NaN   \n",
       "22  3C21DCFB8E3FEC1CB3D2BFB413A78220                Video   \n",
       "23  C83B3441704D4E6172B86CEF3F66845C                Photo   \n",
       "24  42E7832DAB2B068F63B32B6AD85B3F9D                  NaN   \n",
       "25  C281F4EF6011EFC61BDE3E96216C0A6A                  NaN   \n",
       "26  C7FB1313DB6D1013282343FFA2AF41FD                Video   \n",
       "27  28D9CAE78D1F3E9877A9559757D57293                  NaN   \n",
       "28  558CAF543304741E670682E4F6CDA1CC                  NaN   \n",
       "29  D2F0691D4B7D3933824640F6AED9D308                  NaN   \n",
       "30  8AB541DCDB76A5DB2B36A51653D94185                  NaN   \n",
       "31  3D87CC3655C276F1771752081423B405                  NaN   \n",
       "32  3701848B96AA740528A2B0E247777D7D                  NaN   \n",
       "33  6A17A38E3ECDA343C2B147F653C8750E                  NaN   \n",
       "34  B8DE8D8CD04864D008EE21081034A9A7                  NaN   \n",
       "35  18176C6AD2871729384062F073CCE94D                Video   \n",
       "36  D42E2D62B7F6B940E6282495A444DB4E                Photo   \n",
       "37  10C2BF422C4605FF55FE23B8C92A2973                  NaN   \n",
       "38  AF11AF01F842E7F120667B7B0B38676D                  NaN   \n",
       "39  4A8129618E3D06342F5BB3A0987E61F0                  NaN   \n",
       "\n",
       "                               links                           domains  \\\n",
       "0                                NaN                               NaN   \n",
       "1                                NaN                               NaN   \n",
       "2   DDFFB4C01DB85921C3580F614575AA6D  BE4539C53C53FFABCFD232DB100C792B   \n",
       "3                                NaN                               NaN   \n",
       "4                                NaN                               NaN   \n",
       "5                                NaN                               NaN   \n",
       "6                                NaN                               NaN   \n",
       "7                                NaN                               NaN   \n",
       "8                                NaN                               NaN   \n",
       "9                                NaN                               NaN   \n",
       "10                               NaN                               NaN   \n",
       "11                               NaN                               NaN   \n",
       "12                               NaN                               NaN   \n",
       "13  5E671E810416A48B15C2C03D66306322  3330516EAADFD093A1C7B9DA83172DEB   \n",
       "14                               NaN                               NaN   \n",
       "15                               NaN                               NaN   \n",
       "16                               NaN                               NaN   \n",
       "17                               NaN                               NaN   \n",
       "18                               NaN                               NaN   \n",
       "19                               NaN                               NaN   \n",
       "20                               NaN                               NaN   \n",
       "21  AEDE23492BD06619BF93EEB39E83B5E4  8E1AE5ECC4EFDC42F77501ED9AA002ED   \n",
       "22                               NaN                               NaN   \n",
       "23                               NaN                               NaN   \n",
       "24                               NaN                               NaN   \n",
       "25                               NaN                               NaN   \n",
       "26                               NaN                               NaN   \n",
       "27                               NaN                               NaN   \n",
       "28                               NaN                               NaN   \n",
       "29                               NaN                               NaN   \n",
       "30                               NaN                               NaN   \n",
       "31  BB422AA00380E45F312FD2CAA75F4960  92D397F8E0F1E77B36B8C612C2C51E23   \n",
       "32  2423BA02A75DB2189335DDC3FB6B74A1  6D323BE93766E79BE423FAC5C28BE39B   \n",
       "33                               NaN                               NaN   \n",
       "34                               NaN                               NaN   \n",
       "35                               NaN                               NaN   \n",
       "36                               NaN                               NaN   \n",
       "37                               NaN                               NaN   \n",
       "38                               NaN                               NaN   \n",
       "39                               NaN                               NaN   \n",
       "\n",
       "   tweet_type                          language   timestamp  \\\n",
       "0    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581262691   \n",
       "1     Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581497241   \n",
       "2    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580978528   \n",
       "3     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581321849   \n",
       "4    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580956787   \n",
       "5    TopLevel  167115458A0DBDFF7E9C0C53A83BAC9B  1581341389   \n",
       "6    TopLevel  ECED8A16BE2A5E8871FD55F4842F16B1  1581004518   \n",
       "7       Quote  ECED8A16BE2A5E8871FD55F4842F16B1  1581020186   \n",
       "8     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581005860   \n",
       "9    TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581103914   \n",
       "10   TopLevel  4DC22C3F31C5C43721E6B5815A595ED6  1581187934   \n",
       "11   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580959419   \n",
       "12      Quote  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581043204   \n",
       "13   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581362695   \n",
       "14   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581062422   \n",
       "15      Quote  ECED8A16BE2A5E8871FD55F4842F16B1  1581197081   \n",
       "16   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581017422   \n",
       "17   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581521058   \n",
       "18    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581081184   \n",
       "19    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581260062   \n",
       "20   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581368704   \n",
       "21   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580950322   \n",
       "22    Retweet  76B8A9C3013AE6414A3E6012413CDC3B  1581467323   \n",
       "23      Quote  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581147754   \n",
       "24    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581226105   \n",
       "25    Retweet  A0C7021AD8299ADF0C9EBE326C115F6F  1581182880   \n",
       "26   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581540621   \n",
       "27   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580954292   \n",
       "28   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581129521   \n",
       "29    Retweet  125C57F4FA6D4E110983FB11B52EFD4E  1581219559   \n",
       "30   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581370024   \n",
       "31   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580975391   \n",
       "32    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581257232   \n",
       "33    Retweet  FA3F382BC409C271E3D6EAF8BE4648DD  1581420958   \n",
       "34    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581313083   \n",
       "35   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581164292   \n",
       "36   TopLevel  ECED8A16BE2A5E8871FD55F4842F16B1  1581197829   \n",
       "37    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581311592   \n",
       "38      Quote  22C448FF81263D4BAF2A176145EE9EAD  1581233650   \n",
       "39    Retweet  B9175601E87101A984A50F8A62A1C374  1581269998   \n",
       "\n",
       "                           a_user_id  a_follower_count  ...  \\\n",
       "0   D557B03872EF8986F7F4426AE094B2FE               986  ...   \n",
       "1   424822AC982CE0E8965506C63B44EC12              1225  ...   \n",
       "2   1EC14E26417AA926095530AC591BA9CE              3016  ...   \n",
       "3   9B9595B6FEB8948BDDF0D222F27E0118              2121  ...   \n",
       "4   525DC99B7CB8F1AC4AD3E66C53FA38E0            813505  ...   \n",
       "5   7E1E2FAD93219D0247BDBE451AB343E9             47678  ...   \n",
       "6   A0FD6DF4B4FBF62949708CDB97CC8124             29358  ...   \n",
       "7   629A622B84E4C67FAB56DCF0DBD785AA             43097  ...   \n",
       "8   AD86A376FA5F26E67263D5FCA8A5BD59               279  ...   \n",
       "9   2F236A7D11ECDAF1FC472E9ACC32AE6B           2287014  ...   \n",
       "10  85A39142470D65A77BB9E86B054AD321             19401  ...   \n",
       "11  0C89F01A8644F9B685BF2225F00EF34B               832  ...   \n",
       "12  46097861C196B25F2C56606CD32AE14C            159570  ...   \n",
       "13  618642C3684BC6B65F905C289D1F376F           1350529  ...   \n",
       "14  49D04A274A357E3039E1DF0F78E2975F             82353  ...   \n",
       "15  162984295C1BCFF55D7977B9ADA50AD8             49949  ...   \n",
       "16  A702303ED1A25C6DD1393BC0CCABF94A            231537  ...   \n",
       "17  0FACF6DB63422FC388A2BB4AA1585AB2           4624448  ...   \n",
       "18  0D01AD1116E22830152CC5FC78CE952D              1873  ...   \n",
       "19  5B8C4D92F1AE859A2FD69BB405B03347               314  ...   \n",
       "20  418D921524B4A26261D5F24586CCEDD7             43765  ...   \n",
       "21  9586F04FF09A614BAE30367575053EC4              3418  ...   \n",
       "22  D1AA2C85FA644D64346EDD88470525F2               737  ...   \n",
       "23  5611A5DB21B6C40C8E3848A4DECFBB24             77584  ...   \n",
       "24  24F43670B7EF0441CCF800C6B77403BC            735882  ...   \n",
       "25  1216DCF3AF02393B3139C17C28DDDE02               358  ...   \n",
       "26  E4B09E5CE7BC5D9FB753CDFCF63ACCC0            471015  ...   \n",
       "27  5A6DCA175E07A222854B7F115C50A3B8               832  ...   \n",
       "28  8397BF026AF4CD632EBB10441B55A22F               830  ...   \n",
       "29  758F32F4069006B21E15EA41BC9EBCEA              3079  ...   \n",
       "30  398EB2EBB16F9F19AF6F6E736EA3CAB6               300  ...   \n",
       "31  4DC65AC7BD963DE1F7617C047C33DE99          52366425  ...   \n",
       "32  5C671539CB41B9807E209349B101E9FF               988  ...   \n",
       "33  2B4E3136FD6C06BF75052DFDF20CF1DE              2822  ...   \n",
       "34  4A91410143FAC6824BFCADC928251106             12072  ...   \n",
       "35  70B900BE17416923D1E236A38798F202           1228134  ...   \n",
       "36  C9A9E1DE113C10893F85C4F3C6DBED1C               192  ...   \n",
       "37  CE89F7F296A0F73D55E2CC0922AAF80D               535  ...   \n",
       "38  E94C0E9E8494F3D603F9D1A5C5242E3D                73  ...   \n",
       "39  1F2AAAFD6E725A4652D9A63D9C35997C               735  ...   \n",
       "\n",
       "                           b_user_id  b_follower_count  b_following_count  \\\n",
       "0   00000776B07587ECA9717BFC301F2D6E                94                648   \n",
       "1   00000B85AAF7DE172876FD96718C4469              1139                 46   \n",
       "2   00000E0C9B364891CDE89ECFC54771DE               780                440   \n",
       "3   00000F04EEDBCF3E1FB9A1948BF353B6                 1                 45   \n",
       "4   000010088197DA00D659853E06935B3E               171                388   \n",
       "5   000012A6D58B300B1B4098C86223F76E              1927               1414   \n",
       "6   000012D4971A83624EF9C6711AE5167D               929                928   \n",
       "7   000013E6563760E3916215D42BE0D406               286                524   \n",
       "8   00001607209C5774DF9207A2AC0EED5F               461                697   \n",
       "9   0000170273D2530A0DF580401CC32AE0                42                118   \n",
       "10  00001F56CDCF81D2EF635B3C0EDE57EB                51                161   \n",
       "11  00002086C1D5C05ADE95E1C60FAF71FD                22                251   \n",
       "12  00002086C1D5C05ADE95E1C60FAF71FD                22                251   \n",
       "13  000024E52825D248DDAB9884DC0BD758               205                 86   \n",
       "14  000025CDC48B25D9888C6640DE433FE9                96                167   \n",
       "15  000026C296F4693A6196F90ABAF80FB6                17                370   \n",
       "16  0000288C66B7563CB98736F96894D9DC               187                397   \n",
       "17  00002C99ACC8931540C190542549BFE0                32                110   \n",
       "18  000030DA986805A0B204966360B8AABB               143                119   \n",
       "19  00003B622698D49D3868B01E557FE4E6               457                430   \n",
       "20  000043D9A730DF47697D0750F509B56A               871                927   \n",
       "21  000043D9A730DF47697D0750F509B56A               871                927   \n",
       "22  000046C8606F1C3F5A7296222C88084B               131               2105   \n",
       "23  00004DF6D6CAB6361EDF8FDE86365ECE                59                194   \n",
       "24  0000510EC3AFC6F9FB8E411852A84877               222                205   \n",
       "25  0000510EC3AFC6F9FB8E411852A84877               222                205   \n",
       "26  0000581864A04C34E289F984EBD20562               895                426   \n",
       "27  00005B5734CD2CB88CBD541ADEBA0F4F               247                391   \n",
       "28  00005B5734CD2CB88CBD541ADEBA0F4F               248                391   \n",
       "29  00005BD9676C7C12A80E686070A180BD               135                216   \n",
       "30  00005FFBE9E33A6EAD7B5A70C1084271               282                283   \n",
       "31  00006047187D0D18598EF12A650E1DAC                22                 50   \n",
       "32  0000648BAA193AE4C625DDF789B57172               251                719   \n",
       "33  000067AEB5C52781117CADDCB811D2A0               257                240   \n",
       "34  000070F30341E475432BAAA845117D6C                20                 55   \n",
       "35  000071667F50BAFEA722A8E8284581E5                18                 58   \n",
       "36  00007227679DC76C1C32802F0A2FDCD0                94                130   \n",
       "37  0000761240F60756083A27D1BA42EF85               308                368   \n",
       "38  00007745A6EE969F1A0F44B10DC17671               268                526   \n",
       "39  00007C6F44F143C51F142F109DC84E3F               538               1538   \n",
       "\n",
       "   b_is_verified  b_account_creation  b_follows_a         reply       retweet  \\\n",
       "0          False          1478011810        False           NaN           NaN   \n",
       "1          False          1540395738         True           NaN  1.581498e+09   \n",
       "2          False          1432084055         True           NaN           NaN   \n",
       "3          False          1534313747        False           NaN           NaN   \n",
       "4          False          1490166885        False           NaN           NaN   \n",
       "5          False          1368483885         True           NaN  1.581347e+09   \n",
       "6          False          1577292324         True  1.581009e+09           NaN   \n",
       "7          False          1439811227         True           NaN           NaN   \n",
       "8          False          1396311956         True           NaN           NaN   \n",
       "9          False          1295601797        False           NaN           NaN   \n",
       "10         False          1397500466        False           NaN           NaN   \n",
       "11         False          1439637842         True           NaN           NaN   \n",
       "12         False          1439637842        False           NaN           NaN   \n",
       "13         False          1402661478        False           NaN           NaN   \n",
       "14         False          1562685797        False           NaN           NaN   \n",
       "15         False          1465619079         True           NaN           NaN   \n",
       "16         False          1577798166        False           NaN  1.581018e+09   \n",
       "17         False          1279606330        False           NaN           NaN   \n",
       "18         False          1559584136         True           NaN           NaN   \n",
       "19         False          1381246972         True           NaN           NaN   \n",
       "20         False          1359042454         True           NaN           NaN   \n",
       "21         False          1359042454         True           NaN           NaN   \n",
       "22         False          1573978269        False           NaN           NaN   \n",
       "23         False          1352843123        False           NaN           NaN   \n",
       "24         False          1431360312        False           NaN           NaN   \n",
       "25         False          1431360312        False           NaN           NaN   \n",
       "26         False          1271603280        False           NaN           NaN   \n",
       "27         False          1292752814         True           NaN  1.580956e+09   \n",
       "28         False          1292752814         True           NaN           NaN   \n",
       "29         False          1566636367         True           NaN  1.581220e+09   \n",
       "30         False          1481197266         True           NaN           NaN   \n",
       "31         False          1340673962        False           NaN           NaN   \n",
       "32         False          1456473671        False           NaN           NaN   \n",
       "33         False          1509932123        False           NaN           NaN   \n",
       "34         False          1573941072        False           NaN  1.581314e+09   \n",
       "35         False          1378427564        False           NaN           NaN   \n",
       "36         False          1538249361         True           NaN           NaN   \n",
       "37         False          1436762988         True           NaN           NaN   \n",
       "38         False          1252294800        False           NaN           NaN   \n",
       "39         False          1483700135        False           NaN           NaN   \n",
       "\n",
       "    retweet_comment          like  \n",
       "0               NaN           NaN  \n",
       "1               NaN  1.581498e+09  \n",
       "2               NaN  1.581061e+09  \n",
       "3               NaN  1.581329e+09  \n",
       "4               NaN  1.580958e+09  \n",
       "5               NaN  1.581347e+09  \n",
       "6               NaN           NaN  \n",
       "7               NaN           NaN  \n",
       "8               NaN  1.581009e+09  \n",
       "9               NaN           NaN  \n",
       "10              NaN  1.581190e+09  \n",
       "11              NaN           NaN  \n",
       "12              NaN  1.581045e+09  \n",
       "13              NaN  1.581375e+09  \n",
       "14              NaN  1.581064e+09  \n",
       "15              NaN           NaN  \n",
       "16              NaN  1.581018e+09  \n",
       "17              NaN           NaN  \n",
       "18              NaN           NaN  \n",
       "19              NaN  1.581260e+09  \n",
       "20              NaN  1.581369e+09  \n",
       "21              NaN  1.580963e+09  \n",
       "22              NaN           NaN  \n",
       "23              NaN           NaN  \n",
       "24              NaN           NaN  \n",
       "25              NaN  1.581191e+09  \n",
       "26              NaN  1.581548e+09  \n",
       "27              NaN  1.580956e+09  \n",
       "28              NaN           NaN  \n",
       "29              NaN           NaN  \n",
       "30              NaN  1.581371e+09  \n",
       "31              NaN           NaN  \n",
       "32              NaN           NaN  \n",
       "33              NaN           NaN  \n",
       "34     1.581314e+09  1.581314e+09  \n",
       "35              NaN  1.581305e+09  \n",
       "36              NaN  1.581232e+09  \n",
       "37              NaN           NaN  \n",
       "38              NaN           NaN  \n",
       "39              NaN  1.581270e+09  \n",
       "\n",
       "[40 rows x 23 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = [\n",
    "    'text_tokens',    ###############\n",
    "    'hashtags',       #Tweet Features\n",
    "    'tweet_id',       #\n",
    "    'media',          #\n",
    "    'links',          #\n",
    "    'domains',        #\n",
    "    'tweet_type',     #\n",
    "    'language',       #\n",
    "    'timestamp',      ###############\n",
    "    'a_user_id',              ###########################\n",
    "    'a_follower_count',       #Engaged With User Features\n",
    "    'a_following_count',      #\n",
    "    'a_is_verified',          #\n",
    "    'a_account_creation',     ###########################\n",
    "    'b_user_id',              #######################\n",
    "    'b_follower_count',       #Engaging User Features\n",
    "    'b_following_count',      #\n",
    "    'b_is_verified',          #\n",
    "    'b_account_creation',     #######################\n",
    "    'b_follows_a',    #################### Engagement Features\n",
    "    'reply',          #Target Reply\n",
    "    'retweet',        #Target Retweet    \n",
    "    'retweet_comment',#Target Retweet with comment\n",
    "    'like',           #Target Like\n",
    "                      ####################\n",
    "]\n",
    "df.columns = features\n",
    "gc.collect()\n",
    "\n",
    "df = df.drop('text_tokens', axis=1)\n",
    "gc.collect()\n",
    "\n",
    "print( df.shape )\n",
    "df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E7D6C5094767223F6F8789A87A1937AB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>D557B03872EF8986F7F4426AE094B2FE</td>\n",
       "      <td>986</td>\n",
       "      <td>...</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>129F4A868712BA2B98D31AF98C3066E4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>424822AC982CE0E8965506C63B44EC12</td>\n",
       "      <td>1225</td>\n",
       "      <td>...</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497559</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497622</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>04C6C2175852CDBBC23B2446C7E7C22D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>1EC14E26417AA926095530AC591BA9CE</td>\n",
       "      <td>3016</td>\n",
       "      <td>...</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581060554</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>168157826315514C120494D4DF8E6216</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>9B9595B6FEB8948BDDF0D222F27E0118</td>\n",
       "      <td>2121</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581328518</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B3E3673782A69D9D8A45D3B222F0B073</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>525DC99B7CB8F1AC4AD3E66C53FA38E0</td>\n",
       "      <td>813505</td>\n",
       "      <td>...</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1580957807</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>D20331F40622336B266EC2BF3572F7E5</td>\n",
       "      <td>AB21A06B694D637075F1EA4F89A05197</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>167115458A0DBDFF7E9C0C53A83BAC9B</td>\n",
       "      <td>1581341389</td>\n",
       "      <td>7E1E2FAD93219D0247BDBE451AB343E9</td>\n",
       "      <td>47678</td>\n",
       "      <td>...</td>\n",
       "      <td>1927</td>\n",
       "      <td>1414</td>\n",
       "      <td>False</td>\n",
       "      <td>1368483885</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581346588</td>\n",
       "      <td>0</td>\n",
       "      <td>1581346588</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>166C053A658691172A7A3CB20D8FB614</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581004518</td>\n",
       "      <td>A0FD6DF4B4FBF62949708CDB97CC8124</td>\n",
       "      <td>29358</td>\n",
       "      <td>...</td>\n",
       "      <td>929</td>\n",
       "      <td>928</td>\n",
       "      <td>False</td>\n",
       "      <td>1577292324</td>\n",
       "      <td>True</td>\n",
       "      <td>1581008849</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C6016D70FDDAF88BB64B00600B48F788</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581020186</td>\n",
       "      <td>629A622B84E4C67FAB56DCF0DBD785AA</td>\n",
       "      <td>43097</td>\n",
       "      <td>...</td>\n",
       "      <td>286</td>\n",
       "      <td>524</td>\n",
       "      <td>False</td>\n",
       "      <td>1439811227</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>293740496A195D5B20DBE00C3AEFFF17</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581005860</td>\n",
       "      <td>AD86A376FA5F26E67263D5FCA8A5BD59</td>\n",
       "      <td>279</td>\n",
       "      <td>...</td>\n",
       "      <td>461</td>\n",
       "      <td>697</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581009248</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3D89E8BE2E330DA8DD754D58EA07E824</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581103914</td>\n",
       "      <td>2F236A7D11ECDAF1FC472E9ACC32AE6B</td>\n",
       "      <td>2287014</td>\n",
       "      <td>...</td>\n",
       "      <td>42</td>\n",
       "      <td>118</td>\n",
       "      <td>False</td>\n",
       "      <td>1295601797</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>D545FDCEEE2C82E21EBBE23505173FA0</td>\n",
       "      <td>311EDE393CEBB5E880F5B3A96A69AA94</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>4DC22C3F31C5C43721E6B5815A595ED6</td>\n",
       "      <td>1581187934</td>\n",
       "      <td>85A39142470D65A77BB9E86B054AD321</td>\n",
       "      <td>19401</td>\n",
       "      <td>...</td>\n",
       "      <td>51</td>\n",
       "      <td>161</td>\n",
       "      <td>False</td>\n",
       "      <td>1397500466</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581189873</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B9C4540F4798A61C1F17CEB3AE369735</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580959419</td>\n",
       "      <td>0C89F01A8644F9B685BF2225F00EF34B</td>\n",
       "      <td>832</td>\n",
       "      <td>...</td>\n",
       "      <td>22</td>\n",
       "      <td>251</td>\n",
       "      <td>False</td>\n",
       "      <td>1439637842</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>40A23C9DE38F5B42FDABD7DE6B73AC6E</td>\n",
       "      <td>EC2126D5DB025A6C66BC24A5596EC475</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581043204</td>\n",
       "      <td>46097861C196B25F2C56606CD32AE14C</td>\n",
       "      <td>159570</td>\n",
       "      <td>...</td>\n",
       "      <td>22</td>\n",
       "      <td>251</td>\n",
       "      <td>False</td>\n",
       "      <td>1439637842</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581045318</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10932CB9E641857BACF9A49D267A8E98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5E671E810416A48B15C2C03D66306322</td>\n",
       "      <td>3330516EAADFD093A1C7B9DA83172DEB</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581362695</td>\n",
       "      <td>618642C3684BC6B65F905C289D1F376F</td>\n",
       "      <td>1350529</td>\n",
       "      <td>...</td>\n",
       "      <td>205</td>\n",
       "      <td>86</td>\n",
       "      <td>False</td>\n",
       "      <td>1402661478</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581375276</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>C321B459658CB455C53F2DE7C235A706\\t34A45F5ED2E4...</td>\n",
       "      <td>86E049967C94CF82BA9EB17A2EE4F3CE</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581062422</td>\n",
       "      <td>49D04A274A357E3039E1DF0F78E2975F</td>\n",
       "      <td>82353</td>\n",
       "      <td>...</td>\n",
       "      <td>96</td>\n",
       "      <td>167</td>\n",
       "      <td>False</td>\n",
       "      <td>1562685797</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581063697</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>NaN</td>\n",
       "      <td>222EF495AA79DCCC3F32AB9754E9F173</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581197081</td>\n",
       "      <td>162984295C1BCFF55D7977B9ADA50AD8</td>\n",
       "      <td>49949</td>\n",
       "      <td>...</td>\n",
       "      <td>17</td>\n",
       "      <td>370</td>\n",
       "      <td>False</td>\n",
       "      <td>1465619079</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>56402281D4D3E8F951CB23AC700A8F5C</td>\n",
       "      <td>9CF609C0D9D099F7C09B52976F5029F7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581017422</td>\n",
       "      <td>A702303ED1A25C6DD1393BC0CCABF94A</td>\n",
       "      <td>231537</td>\n",
       "      <td>...</td>\n",
       "      <td>187</td>\n",
       "      <td>397</td>\n",
       "      <td>False</td>\n",
       "      <td>1577798166</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1581017998</td>\n",
       "      <td>0</td>\n",
       "      <td>1581017998</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>805C7A95714ADBA122FF5F55C9C9C8E4\\t6372CE4C7D66...</td>\n",
       "      <td>6601197539548AAD5C9CB9FCD162A5C7</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581521058</td>\n",
       "      <td>0FACF6DB63422FC388A2BB4AA1585AB2</td>\n",
       "      <td>4624448</td>\n",
       "      <td>...</td>\n",
       "      <td>32</td>\n",
       "      <td>110</td>\n",
       "      <td>False</td>\n",
       "      <td>1279606330</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>D8B2515734BFAA8902070F3A9C4F6BCD</td>\n",
       "      <td>B72C9FD782AF79BA56755410E3C617FC</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581081184</td>\n",
       "      <td>0D01AD1116E22830152CC5FC78CE952D</td>\n",
       "      <td>1873</td>\n",
       "      <td>...</td>\n",
       "      <td>143</td>\n",
       "      <td>119</td>\n",
       "      <td>False</td>\n",
       "      <td>1559584136</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C33F498C0F3FE07667B08B834A2FF474</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581260062</td>\n",
       "      <td>5B8C4D92F1AE859A2FD69BB405B03347</td>\n",
       "      <td>314</td>\n",
       "      <td>...</td>\n",
       "      <td>457</td>\n",
       "      <td>430</td>\n",
       "      <td>False</td>\n",
       "      <td>1381246972</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581260483</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NaN</td>\n",
       "      <td>35359883026EF6D9A31F6C962517A15D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581368704</td>\n",
       "      <td>418D921524B4A26261D5F24586CCEDD7</td>\n",
       "      <td>43765</td>\n",
       "      <td>...</td>\n",
       "      <td>871</td>\n",
       "      <td>927</td>\n",
       "      <td>False</td>\n",
       "      <td>1359042454</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581368951</td>\n",
       "      <td>20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C95D525FFA8A789ABBA005B9F7BE7174</td>\n",
       "      <td>NaN</td>\n",
       "      <td>AEDE23492BD06619BF93EEB39E83B5E4</td>\n",
       "      <td>8E1AE5ECC4EFDC42F77501ED9AA002ED</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580950322</td>\n",
       "      <td>9586F04FF09A614BAE30367575053EC4</td>\n",
       "      <td>3418</td>\n",
       "      <td>...</td>\n",
       "      <td>871</td>\n",
       "      <td>927</td>\n",
       "      <td>False</td>\n",
       "      <td>1359042454</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1580962924</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3C21DCFB8E3FEC1CB3D2BFB413A78220</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>76B8A9C3013AE6414A3E6012413CDC3B</td>\n",
       "      <td>1581467323</td>\n",
       "      <td>D1AA2C85FA644D64346EDD88470525F2</td>\n",
       "      <td>737</td>\n",
       "      <td>...</td>\n",
       "      <td>131</td>\n",
       "      <td>2105</td>\n",
       "      <td>False</td>\n",
       "      <td>1573978269</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>F9B5807EDB6B215DF96862159A6B4892</td>\n",
       "      <td>C83B3441704D4E6172B86CEF3F66845C</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581147754</td>\n",
       "      <td>5611A5DB21B6C40C8E3848A4DECFBB24</td>\n",
       "      <td>77584</td>\n",
       "      <td>...</td>\n",
       "      <td>59</td>\n",
       "      <td>194</td>\n",
       "      <td>False</td>\n",
       "      <td>1352843123</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>42E7832DAB2B068F63B32B6AD85B3F9D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581226105</td>\n",
       "      <td>24F43670B7EF0441CCF800C6B77403BC</td>\n",
       "      <td>735882</td>\n",
       "      <td>...</td>\n",
       "      <td>222</td>\n",
       "      <td>205</td>\n",
       "      <td>False</td>\n",
       "      <td>1431360312</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C281F4EF6011EFC61BDE3E96216C0A6A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>A0C7021AD8299ADF0C9EBE326C115F6F</td>\n",
       "      <td>1581182880</td>\n",
       "      <td>1216DCF3AF02393B3139C17C28DDDE02</td>\n",
       "      <td>358</td>\n",
       "      <td>...</td>\n",
       "      <td>222</td>\n",
       "      <td>205</td>\n",
       "      <td>False</td>\n",
       "      <td>1431360312</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581190910</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NaN</td>\n",
       "      <td>C7FB1313DB6D1013282343FFA2AF41FD</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581540621</td>\n",
       "      <td>E4B09E5CE7BC5D9FB753CDFCF63ACCC0</td>\n",
       "      <td>471015</td>\n",
       "      <td>...</td>\n",
       "      <td>895</td>\n",
       "      <td>426</td>\n",
       "      <td>False</td>\n",
       "      <td>1271603280</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581547757</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>57BDABF6E201A2D0B6C7652379FABEEE</td>\n",
       "      <td>28D9CAE78D1F3E9877A9559757D57293</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1580954292</td>\n",
       "      <td>5A6DCA175E07A222854B7F115C50A3B8</td>\n",
       "      <td>832</td>\n",
       "      <td>...</td>\n",
       "      <td>247</td>\n",
       "      <td>391</td>\n",
       "      <td>False</td>\n",
       "      <td>1292752814</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1580955948</td>\n",
       "      <td>0</td>\n",
       "      <td>1580955948</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>57BDABF6E201A2D0B6C7652379FABEEE</td>\n",
       "      <td>558CAF543304741E670682E4F6CDA1CC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581129521</td>\n",
       "      <td>8397BF026AF4CD632EBB10441B55A22F</td>\n",
       "      <td>830</td>\n",
       "      <td>...</td>\n",
       "      <td>248</td>\n",
       "      <td>391</td>\n",
       "      <td>False</td>\n",
       "      <td>1292752814</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NaN</td>\n",
       "      <td>D2F0691D4B7D3933824640F6AED9D308</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>125C57F4FA6D4E110983FB11B52EFD4E</td>\n",
       "      <td>1581219559</td>\n",
       "      <td>758F32F4069006B21E15EA41BC9EBCEA</td>\n",
       "      <td>3079</td>\n",
       "      <td>...</td>\n",
       "      <td>135</td>\n",
       "      <td>216</td>\n",
       "      <td>False</td>\n",
       "      <td>1566636367</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581219633</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8AB541DCDB76A5DB2B36A51653D94185</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581370024</td>\n",
       "      <td>398EB2EBB16F9F19AF6F6E736EA3CAB6</td>\n",
       "      <td>300</td>\n",
       "      <td>...</td>\n",
       "      <td>282</td>\n",
       "      <td>283</td>\n",
       "      <td>False</td>\n",
       "      <td>1481197266</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581370892</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3D87CC3655C276F1771752081423B405</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BB422AA00380E45F312FD2CAA75F4960</td>\n",
       "      <td>92D397F8E0F1E77B36B8C612C2C51E23</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1580975391</td>\n",
       "      <td>4DC65AC7BD963DE1F7617C047C33DE99</td>\n",
       "      <td>52366425</td>\n",
       "      <td>...</td>\n",
       "      <td>22</td>\n",
       "      <td>50</td>\n",
       "      <td>False</td>\n",
       "      <td>1340673962</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>DB32BD91C2F1B37BE700F374A07FBC61</td>\n",
       "      <td>3701848B96AA740528A2B0E247777D7D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2423BA02A75DB2189335DDC3FB6B74A1</td>\n",
       "      <td>6D323BE93766E79BE423FAC5C28BE39B</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581257232</td>\n",
       "      <td>5C671539CB41B9807E209349B101E9FF</td>\n",
       "      <td>988</td>\n",
       "      <td>...</td>\n",
       "      <td>251</td>\n",
       "      <td>719</td>\n",
       "      <td>False</td>\n",
       "      <td>1456473671</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6A17A38E3ECDA343C2B147F653C8750E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>FA3F382BC409C271E3D6EAF8BE4648DD</td>\n",
       "      <td>1581420958</td>\n",
       "      <td>2B4E3136FD6C06BF75052DFDF20CF1DE</td>\n",
       "      <td>2822</td>\n",
       "      <td>...</td>\n",
       "      <td>257</td>\n",
       "      <td>240</td>\n",
       "      <td>False</td>\n",
       "      <td>1509932123</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>EA152CAF6EE7894E914C522BD1B5E702</td>\n",
       "      <td>B8DE8D8CD04864D008EE21081034A9A7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581313083</td>\n",
       "      <td>4A91410143FAC6824BFCADC928251106</td>\n",
       "      <td>12072</td>\n",
       "      <td>...</td>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "      <td>False</td>\n",
       "      <td>1573941072</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>1581314362</td>\n",
       "      <td>1581314362</td>\n",
       "      <td>1581314359</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>NaN</td>\n",
       "      <td>18176C6AD2871729384062F073CCE94D</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581164292</td>\n",
       "      <td>70B900BE17416923D1E236A38798F202</td>\n",
       "      <td>1228134</td>\n",
       "      <td>...</td>\n",
       "      <td>18</td>\n",
       "      <td>58</td>\n",
       "      <td>False</td>\n",
       "      <td>1378427564</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581305364</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>NaN</td>\n",
       "      <td>D42E2D62B7F6B940E6282495A444DB4E</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1581197829</td>\n",
       "      <td>C9A9E1DE113C10893F85C4F3C6DBED1C</td>\n",
       "      <td>192</td>\n",
       "      <td>...</td>\n",
       "      <td>94</td>\n",
       "      <td>130</td>\n",
       "      <td>False</td>\n",
       "      <td>1538249361</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581231721</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10C2BF422C4605FF55FE23B8C92A2973</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581311592</td>\n",
       "      <td>CE89F7F296A0F73D55E2CC0922AAF80D</td>\n",
       "      <td>535</td>\n",
       "      <td>...</td>\n",
       "      <td>308</td>\n",
       "      <td>368</td>\n",
       "      <td>False</td>\n",
       "      <td>1436762988</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>NaN</td>\n",
       "      <td>AF11AF01F842E7F120667B7B0B38676D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581233650</td>\n",
       "      <td>E94C0E9E8494F3D603F9D1A5C5242E3D</td>\n",
       "      <td>73</td>\n",
       "      <td>...</td>\n",
       "      <td>268</td>\n",
       "      <td>526</td>\n",
       "      <td>False</td>\n",
       "      <td>1252294800</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>12CC826EEC2C231DF77F499DCFDDE7DD</td>\n",
       "      <td>4A8129618E3D06342F5BB3A0987E61F0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>B9175601E87101A984A50F8A62A1C374</td>\n",
       "      <td>1581269998</td>\n",
       "      <td>1F2AAAFD6E725A4652D9A63D9C35997C</td>\n",
       "      <td>735</td>\n",
       "      <td>...</td>\n",
       "      <td>538</td>\n",
       "      <td>1538</td>\n",
       "      <td>False</td>\n",
       "      <td>1483700135</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581270155</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>40 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             hashtags  \\\n",
       "0                                                 NaN   \n",
       "1   83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                    D20331F40622336B266EC2BF3572F7E5   \n",
       "6                                                 NaN   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                   D545FDCEEE2C82E21EBBE23505173FA0   \n",
       "11                                                NaN   \n",
       "12                   40A23C9DE38F5B42FDABD7DE6B73AC6E   \n",
       "13                                                NaN   \n",
       "14  C321B459658CB455C53F2DE7C235A706\\t34A45F5ED2E4...   \n",
       "15                                                NaN   \n",
       "16                   56402281D4D3E8F951CB23AC700A8F5C   \n",
       "17  805C7A95714ADBA122FF5F55C9C9C8E4\\t6372CE4C7D66...   \n",
       "18                   D8B2515734BFAA8902070F3A9C4F6BCD   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22                                                NaN   \n",
       "23                   F9B5807EDB6B215DF96862159A6B4892   \n",
       "24                                                NaN   \n",
       "25                                                NaN   \n",
       "26                                                NaN   \n",
       "27                   57BDABF6E201A2D0B6C7652379FABEEE   \n",
       "28                   57BDABF6E201A2D0B6C7652379FABEEE   \n",
       "29                                                NaN   \n",
       "30                                                NaN   \n",
       "31                                                NaN   \n",
       "32                   DB32BD91C2F1B37BE700F374A07FBC61   \n",
       "33                                                NaN   \n",
       "34                   EA152CAF6EE7894E914C522BD1B5E702   \n",
       "35                                                NaN   \n",
       "36                                                NaN   \n",
       "37                                                NaN   \n",
       "38                                                NaN   \n",
       "39                   12CC826EEC2C231DF77F499DCFDDE7DD   \n",
       "\n",
       "                            tweet_id                media  \\\n",
       "0   E7D6C5094767223F6F8789A87A1937AB                  NaN   \n",
       "1   129F4A868712BA2B98D31AF98C3066E4                  NaN   \n",
       "2   04C6C2175852CDBBC23B2446C7E7C22D                  NaN   \n",
       "3   168157826315514C120494D4DF8E6216                  NaN   \n",
       "4   B3E3673782A69D9D8A45D3B222F0B073                Photo   \n",
       "5   AB21A06B694D637075F1EA4F89A05197                Video   \n",
       "6   166C053A658691172A7A3CB20D8FB614  Photo\\tPhoto\\tPhoto   \n",
       "7   C6016D70FDDAF88BB64B00600B48F788                  NaN   \n",
       "8   293740496A195D5B20DBE00C3AEFFF17                Video   \n",
       "9   3D89E8BE2E330DA8DD754D58EA07E824                Video   \n",
       "10  311EDE393CEBB5E880F5B3A96A69AA94                Photo   \n",
       "11  B9C4540F4798A61C1F17CEB3AE369735         Photo\\tPhoto   \n",
       "12  EC2126D5DB025A6C66BC24A5596EC475                  NaN   \n",
       "13  10932CB9E641857BACF9A49D267A8E98                  NaN   \n",
       "14  86E049967C94CF82BA9EB17A2EE4F3CE  Photo\\tPhoto\\tPhoto   \n",
       "15  222EF495AA79DCCC3F32AB9754E9F173                  NaN   \n",
       "16  9CF609C0D9D099F7C09B52976F5029F7                  NaN   \n",
       "17  6601197539548AAD5C9CB9FCD162A5C7         Photo\\tPhoto   \n",
       "18  B72C9FD782AF79BA56755410E3C617FC                Video   \n",
       "19  C33F498C0F3FE07667B08B834A2FF474                Video   \n",
       "20  35359883026EF6D9A31F6C962517A15D                  NaN   \n",
       "21  C95D525FFA8A789ABBA005B9F7BE7174                  NaN   \n",
       "22  3C21DCFB8E3FEC1CB3D2BFB413A78220                Video   \n",
       "23  C83B3441704D4E6172B86CEF3F66845C                Photo   \n",
       "24  42E7832DAB2B068F63B32B6AD85B3F9D                  NaN   \n",
       "25  C281F4EF6011EFC61BDE3E96216C0A6A                  NaN   \n",
       "26  C7FB1313DB6D1013282343FFA2AF41FD                Video   \n",
       "27  28D9CAE78D1F3E9877A9559757D57293                  NaN   \n",
       "28  558CAF543304741E670682E4F6CDA1CC                  NaN   \n",
       "29  D2F0691D4B7D3933824640F6AED9D308                  NaN   \n",
       "30  8AB541DCDB76A5DB2B36A51653D94185                  NaN   \n",
       "31  3D87CC3655C276F1771752081423B405                  NaN   \n",
       "32  3701848B96AA740528A2B0E247777D7D                  NaN   \n",
       "33  6A17A38E3ECDA343C2B147F653C8750E                  NaN   \n",
       "34  B8DE8D8CD04864D008EE21081034A9A7                  NaN   \n",
       "35  18176C6AD2871729384062F073CCE94D                Video   \n",
       "36  D42E2D62B7F6B940E6282495A444DB4E                Photo   \n",
       "37  10C2BF422C4605FF55FE23B8C92A2973                  NaN   \n",
       "38  AF11AF01F842E7F120667B7B0B38676D                  NaN   \n",
       "39  4A8129618E3D06342F5BB3A0987E61F0                  NaN   \n",
       "\n",
       "                               links                           domains  \\\n",
       "0                                NaN                               NaN   \n",
       "1                                NaN                               NaN   \n",
       "2   DDFFB4C01DB85921C3580F614575AA6D  BE4539C53C53FFABCFD232DB100C792B   \n",
       "3                                NaN                               NaN   \n",
       "4                                NaN                               NaN   \n",
       "5                                NaN                               NaN   \n",
       "6                                NaN                               NaN   \n",
       "7                                NaN                               NaN   \n",
       "8                                NaN                               NaN   \n",
       "9                                NaN                               NaN   \n",
       "10                               NaN                               NaN   \n",
       "11                               NaN                               NaN   \n",
       "12                               NaN                               NaN   \n",
       "13  5E671E810416A48B15C2C03D66306322  3330516EAADFD093A1C7B9DA83172DEB   \n",
       "14                               NaN                               NaN   \n",
       "15                               NaN                               NaN   \n",
       "16                               NaN                               NaN   \n",
       "17                               NaN                               NaN   \n",
       "18                               NaN                               NaN   \n",
       "19                               NaN                               NaN   \n",
       "20                               NaN                               NaN   \n",
       "21  AEDE23492BD06619BF93EEB39E83B5E4  8E1AE5ECC4EFDC42F77501ED9AA002ED   \n",
       "22                               NaN                               NaN   \n",
       "23                               NaN                               NaN   \n",
       "24                               NaN                               NaN   \n",
       "25                               NaN                               NaN   \n",
       "26                               NaN                               NaN   \n",
       "27                               NaN                               NaN   \n",
       "28                               NaN                               NaN   \n",
       "29                               NaN                               NaN   \n",
       "30                               NaN                               NaN   \n",
       "31  BB422AA00380E45F312FD2CAA75F4960  92D397F8E0F1E77B36B8C612C2C51E23   \n",
       "32  2423BA02A75DB2189335DDC3FB6B74A1  6D323BE93766E79BE423FAC5C28BE39B   \n",
       "33                               NaN                               NaN   \n",
       "34                               NaN                               NaN   \n",
       "35                               NaN                               NaN   \n",
       "36                               NaN                               NaN   \n",
       "37                               NaN                               NaN   \n",
       "38                               NaN                               NaN   \n",
       "39                               NaN                               NaN   \n",
       "\n",
       "   tweet_type                          language   timestamp  \\\n",
       "0    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581262691   \n",
       "1     Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581497241   \n",
       "2    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580978528   \n",
       "3     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581321849   \n",
       "4    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580956787   \n",
       "5    TopLevel  167115458A0DBDFF7E9C0C53A83BAC9B  1581341389   \n",
       "6    TopLevel  ECED8A16BE2A5E8871FD55F4842F16B1  1581004518   \n",
       "7       Quote  ECED8A16BE2A5E8871FD55F4842F16B1  1581020186   \n",
       "8     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581005860   \n",
       "9    TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581103914   \n",
       "10   TopLevel  4DC22C3F31C5C43721E6B5815A595ED6  1581187934   \n",
       "11   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580959419   \n",
       "12      Quote  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581043204   \n",
       "13   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581362695   \n",
       "14   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581062422   \n",
       "15      Quote  ECED8A16BE2A5E8871FD55F4842F16B1  1581197081   \n",
       "16   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581017422   \n",
       "17   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581521058   \n",
       "18    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581081184   \n",
       "19    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581260062   \n",
       "20   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581368704   \n",
       "21   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580950322   \n",
       "22    Retweet  76B8A9C3013AE6414A3E6012413CDC3B  1581467323   \n",
       "23      Quote  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581147754   \n",
       "24    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581226105   \n",
       "25    Retweet  A0C7021AD8299ADF0C9EBE326C115F6F  1581182880   \n",
       "26   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581540621   \n",
       "27   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1580954292   \n",
       "28   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581129521   \n",
       "29    Retweet  125C57F4FA6D4E110983FB11B52EFD4E  1581219559   \n",
       "30   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581370024   \n",
       "31   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1580975391   \n",
       "32    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581257232   \n",
       "33    Retweet  FA3F382BC409C271E3D6EAF8BE4648DD  1581420958   \n",
       "34    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581313083   \n",
       "35   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581164292   \n",
       "36   TopLevel  ECED8A16BE2A5E8871FD55F4842F16B1  1581197829   \n",
       "37    Retweet  06D61DCBBE938971E1EA0C38BD9B5446  1581311592   \n",
       "38      Quote  22C448FF81263D4BAF2A176145EE9EAD  1581233650   \n",
       "39    Retweet  B9175601E87101A984A50F8A62A1C374  1581269998   \n",
       "\n",
       "                           a_user_id  a_follower_count  ...  b_follower_count  \\\n",
       "0   D557B03872EF8986F7F4426AE094B2FE               986  ...                94   \n",
       "1   424822AC982CE0E8965506C63B44EC12              1225  ...              1139   \n",
       "2   1EC14E26417AA926095530AC591BA9CE              3016  ...               780   \n",
       "3   9B9595B6FEB8948BDDF0D222F27E0118              2121  ...                 1   \n",
       "4   525DC99B7CB8F1AC4AD3E66C53FA38E0            813505  ...               171   \n",
       "5   7E1E2FAD93219D0247BDBE451AB343E9             47678  ...              1927   \n",
       "6   A0FD6DF4B4FBF62949708CDB97CC8124             29358  ...               929   \n",
       "7   629A622B84E4C67FAB56DCF0DBD785AA             43097  ...               286   \n",
       "8   AD86A376FA5F26E67263D5FCA8A5BD59               279  ...               461   \n",
       "9   2F236A7D11ECDAF1FC472E9ACC32AE6B           2287014  ...                42   \n",
       "10  85A39142470D65A77BB9E86B054AD321             19401  ...                51   \n",
       "11  0C89F01A8644F9B685BF2225F00EF34B               832  ...                22   \n",
       "12  46097861C196B25F2C56606CD32AE14C            159570  ...                22   \n",
       "13  618642C3684BC6B65F905C289D1F376F           1350529  ...               205   \n",
       "14  49D04A274A357E3039E1DF0F78E2975F             82353  ...                96   \n",
       "15  162984295C1BCFF55D7977B9ADA50AD8             49949  ...                17   \n",
       "16  A702303ED1A25C6DD1393BC0CCABF94A            231537  ...               187   \n",
       "17  0FACF6DB63422FC388A2BB4AA1585AB2           4624448  ...                32   \n",
       "18  0D01AD1116E22830152CC5FC78CE952D              1873  ...               143   \n",
       "19  5B8C4D92F1AE859A2FD69BB405B03347               314  ...               457   \n",
       "20  418D921524B4A26261D5F24586CCEDD7             43765  ...               871   \n",
       "21  9586F04FF09A614BAE30367575053EC4              3418  ...               871   \n",
       "22  D1AA2C85FA644D64346EDD88470525F2               737  ...               131   \n",
       "23  5611A5DB21B6C40C8E3848A4DECFBB24             77584  ...                59   \n",
       "24  24F43670B7EF0441CCF800C6B77403BC            735882  ...               222   \n",
       "25  1216DCF3AF02393B3139C17C28DDDE02               358  ...               222   \n",
       "26  E4B09E5CE7BC5D9FB753CDFCF63ACCC0            471015  ...               895   \n",
       "27  5A6DCA175E07A222854B7F115C50A3B8               832  ...               247   \n",
       "28  8397BF026AF4CD632EBB10441B55A22F               830  ...               248   \n",
       "29  758F32F4069006B21E15EA41BC9EBCEA              3079  ...               135   \n",
       "30  398EB2EBB16F9F19AF6F6E736EA3CAB6               300  ...               282   \n",
       "31  4DC65AC7BD963DE1F7617C047C33DE99          52366425  ...                22   \n",
       "32  5C671539CB41B9807E209349B101E9FF               988  ...               251   \n",
       "33  2B4E3136FD6C06BF75052DFDF20CF1DE              2822  ...               257   \n",
       "34  4A91410143FAC6824BFCADC928251106             12072  ...                20   \n",
       "35  70B900BE17416923D1E236A38798F202           1228134  ...                18   \n",
       "36  C9A9E1DE113C10893F85C4F3C6DBED1C               192  ...                94   \n",
       "37  CE89F7F296A0F73D55E2CC0922AAF80D               535  ...               308   \n",
       "38  E94C0E9E8494F3D603F9D1A5C5242E3D                73  ...               268   \n",
       "39  1F2AAAFD6E725A4652D9A63D9C35997C               735  ...               538   \n",
       "\n",
       "    b_following_count  b_is_verified b_account_creation  b_follows_a  \\\n",
       "0                 648          False         1478011810        False   \n",
       "1                  46          False         1540395738         True   \n",
       "2                 440          False         1432084055         True   \n",
       "3                  45          False         1534313747        False   \n",
       "4                 388          False         1490166885        False   \n",
       "5                1414          False         1368483885         True   \n",
       "6                 928          False         1577292324         True   \n",
       "7                 524          False         1439811227         True   \n",
       "8                 697          False         1396311956         True   \n",
       "9                 118          False         1295601797        False   \n",
       "10                161          False         1397500466        False   \n",
       "11                251          False         1439637842         True   \n",
       "12                251          False         1439637842        False   \n",
       "13                 86          False         1402661478        False   \n",
       "14                167          False         1562685797        False   \n",
       "15                370          False         1465619079         True   \n",
       "16                397          False         1577798166        False   \n",
       "17                110          False         1279606330        False   \n",
       "18                119          False         1559584136         True   \n",
       "19                430          False         1381246972         True   \n",
       "20                927          False         1359042454         True   \n",
       "21                927          False         1359042454         True   \n",
       "22               2105          False         1573978269        False   \n",
       "23                194          False         1352843123        False   \n",
       "24                205          False         1431360312        False   \n",
       "25                205          False         1431360312        False   \n",
       "26                426          False         1271603280        False   \n",
       "27                391          False         1292752814         True   \n",
       "28                391          False         1292752814         True   \n",
       "29                216          False         1566636367         True   \n",
       "30                283          False         1481197266         True   \n",
       "31                 50          False         1340673962        False   \n",
       "32                719          False         1456473671        False   \n",
       "33                240          False         1509932123        False   \n",
       "34                 55          False         1573941072        False   \n",
       "35                 58          False         1378427564        False   \n",
       "36                130          False         1538249361         True   \n",
       "37                368          False         1436762988         True   \n",
       "38                526          False         1252294800        False   \n",
       "39               1538          False         1483700135        False   \n",
       "\n",
       "         reply     retweet  retweet_comment        like  id  \n",
       "0            0           0                0           0   0  \n",
       "1            0  1581497559                0  1581497622   1  \n",
       "2            0           0                0  1581060554   2  \n",
       "3            0           0                0  1581328518   3  \n",
       "4            0           0                0  1580957807   4  \n",
       "5            0  1581346588                0  1581346588   5  \n",
       "6   1581008849           0                0           0   6  \n",
       "7            0           0                0           0   7  \n",
       "8            0           0                0  1581009248   8  \n",
       "9            0           0                0           0   9  \n",
       "10           0           0                0  1581189873  10  \n",
       "11           0           0                0           0  11  \n",
       "12           0           0                0  1581045318  12  \n",
       "13           0           0                0  1581375276  13  \n",
       "14           0           0                0  1581063697  14  \n",
       "15           0           0                0           0  15  \n",
       "16           0  1581017998                0  1581017998  16  \n",
       "17           0           0                0           0  17  \n",
       "18           0           0                0           0  18  \n",
       "19           0           0                0  1581260483  19  \n",
       "20           0           0                0  1581368951  20  \n",
       "21           0           0                0  1580962924  21  \n",
       "22           0           0                0           0  22  \n",
       "23           0           0                0           0  23  \n",
       "24           0           0                0           0  24  \n",
       "25           0           0                0  1581190910  25  \n",
       "26           0           0                0  1581547757  26  \n",
       "27           0  1580955948                0  1580955948  27  \n",
       "28           0           0                0           0  28  \n",
       "29           0  1581219633                0           0  29  \n",
       "30           0           0                0  1581370892  30  \n",
       "31           0           0                0           0  31  \n",
       "32           0           0                0           0  32  \n",
       "33           0           0                0           0  33  \n",
       "34           0  1581314362       1581314362  1581314359  34  \n",
       "35           0           0                0  1581305364  35  \n",
       "36           0           0                0  1581231721  36  \n",
       "37           0           0                0           0  37  \n",
       "38           0           0                0           0  38  \n",
       "39           0           0                0  1581270155  39  \n",
       "\n",
       "[40 rows x 24 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['id']   = np.arange( df.shape[0] )\n",
    "df['id']   = df['id'].astype(np.uint32)\n",
    "\n",
    "df['reply']   = df['reply'].fillna(0)\n",
    "df['retweet'] = df['retweet'].fillna(0)\n",
    "df['retweet_comment'] = df['retweet_comment'].fillna(0)\n",
    "df['like']    = df['like'].fillna(0)\n",
    "\n",
    "df['reply']   = df['reply'].astype(np.uint32)\n",
    "df['retweet'] = df['retweet'].astype(np.uint32)\n",
    "df['retweet_comment'] = df['retweet_comment'].astype(np.uint32)\n",
    "df['like']    = df['like'].astype(np.uint32)\n",
    "\n",
    "df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "hashtags              object\n",
       "tweet_id              object\n",
       "media                 object\n",
       "links                 object\n",
       "domains               object\n",
       "tweet_type            object\n",
       "language              object\n",
       "timestamp             uint32\n",
       "a_user_id             object\n",
       "a_follower_count      uint32\n",
       "a_following_count     uint32\n",
       "a_is_verified           bool\n",
       "a_account_creation    uint32\n",
       "b_user_id             object\n",
       "b_follower_count      uint32\n",
       "b_following_count     uint32\n",
       "b_is_verified           bool\n",
       "b_account_creation    uint32\n",
       "b_follows_a             bool\n",
       "reply                 uint32\n",
       "retweet               uint32\n",
       "retweet_comment       uint32\n",
       "like                  uint32\n",
       "id                    uint32\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['timestamp']         = df['timestamp'].astype( np.uint32 )\n",
    "df['a_follower_count']  = df['a_follower_count'].astype( np.uint32 )\n",
    "df['a_following_count'] = df['a_following_count'].astype( np.uint32 )\n",
    "df['a_account_creation']= df['a_account_creation'].astype( np.uint32 )\n",
    "df['b_follower_count']  = df['b_follower_count'].astype( np.uint32 )\n",
    "df['b_following_count'] = df['b_following_count'].astype( np.uint32 )\n",
    "df['b_account_creation']= df['b_account_creation'].astype( np.uint32 )\n",
    "gc.collect()\n",
    "\n",
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12434735, 20)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dv = pd.read_csv('../input/val.tsv', sep='\\x01', header=None, dtype=DTYPES )\n",
    "gc.collect()\n",
    "dv.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>a_following_count</th>\n",
       "      <th>a_is_verified</th>\n",
       "      <th>a_account_creation</th>\n",
       "      <th>b_user_id</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7647B4E9DAF4C1D8973397DC2A04F3E3</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581703126</td>\n",
       "      <td>8A9AB92B775C62C4AB60DF6773A01571</td>\n",
       "      <td>13941</td>\n",
       "      <td>1216</td>\n",
       "      <td>False</td>\n",
       "      <td>1448292186</td>\n",
       "      <td>0000006C3074607050F1339DDCB890BB</td>\n",
       "      <td>27448</td>\n",
       "      <td>600</td>\n",
       "      <td>False</td>\n",
       "      <td>1520948869</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>408DB1803264B5FF55F73EC06BE9BD77</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>B9175601E87101A984A50F8A62A1C374</td>\n",
       "      <td>1582021842</td>\n",
       "      <td>825B003667DF15692B1704F44DAFD1A2</td>\n",
       "      <td>186</td>\n",
       "      <td>100</td>\n",
       "      <td>False</td>\n",
       "      <td>1263078566</td>\n",
       "      <td>000013315386492275CCBF7AEF293EF0</td>\n",
       "      <td>139</td>\n",
       "      <td>956</td>\n",
       "      <td>False</td>\n",
       "      <td>1335110299</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2EE951379C47E8BF62EABB8FA027F753</td>\n",
       "      <td>GIF</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>2996EB2FE8162C076D070A4C8D6532CD</td>\n",
       "      <td>1581734918</td>\n",
       "      <td>2DEC09796D0EB473244CB7A4FB13A0A0</td>\n",
       "      <td>250470</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>1356488269</td>\n",
       "      <td>00001569CB28972FC8173122D9DA162F</td>\n",
       "      <td>16</td>\n",
       "      <td>97</td>\n",
       "      <td>False</td>\n",
       "      <td>1503940711</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2135F24B05DAE3EF213F9CE80FDC6DAF</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581913613</td>\n",
       "      <td>487BCB8F1677A6D9EEE612940410CBB6</td>\n",
       "      <td>516</td>\n",
       "      <td>406</td>\n",
       "      <td>False</td>\n",
       "      <td>1449096567</td>\n",
       "      <td>00001607209C5774DF9207A2AC0EED5F</td>\n",
       "      <td>460</td>\n",
       "      <td>693</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>09143FEDE9BD494A6EA9A7EE160565E3</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581565745</td>\n",
       "      <td>C85C39DC8D279E51DF1BBF0391E5EB2E</td>\n",
       "      <td>19576</td>\n",
       "      <td>273</td>\n",
       "      <td>True</td>\n",
       "      <td>1236181798</td>\n",
       "      <td>0000177705514C315F2FC6DFA3872712</td>\n",
       "      <td>468</td>\n",
       "      <td>3837</td>\n",
       "      <td>False</td>\n",
       "      <td>1298646801</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>60968762145D2AF58A58AFB376B2B00C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581964037</td>\n",
       "      <td>D0A3A1BDAF414BE91D6D482A021614CC</td>\n",
       "      <td>14839</td>\n",
       "      <td>4679</td>\n",
       "      <td>False</td>\n",
       "      <td>1451365323</td>\n",
       "      <td>00001BC70532632181F17B2A65EFD2BA</td>\n",
       "      <td>118</td>\n",
       "      <td>959</td>\n",
       "      <td>False</td>\n",
       "      <td>1424527389</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AC6D3435D9553482AD3DE25576B85B57\\t699C2389AC28...</td>\n",
       "      <td>706310D7975C15B9FB1FA3FBAE8A126B</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>48236EC80FDDDFADE99420ABC9210DDF</td>\n",
       "      <td>1582130362</td>\n",
       "      <td>89970E5689082BE57C4D3EEEE32C93F1</td>\n",
       "      <td>184674</td>\n",
       "      <td>119</td>\n",
       "      <td>False</td>\n",
       "      <td>1469218273</td>\n",
       "      <td>00001F56CDCF81D2EF635B3C0EDE57EB</td>\n",
       "      <td>52</td>\n",
       "      <td>162</td>\n",
       "      <td>False</td>\n",
       "      <td>1397500466</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>DBC37B8C8DC70C70F588D37CC5006ABB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581836693</td>\n",
       "      <td>66C2B62912A4B5F43CA870F1BE7D9B5D</td>\n",
       "      <td>735185</td>\n",
       "      <td>465</td>\n",
       "      <td>False</td>\n",
       "      <td>1388514185</td>\n",
       "      <td>00001F56CDCF81D2EF635B3C0EDE57EB</td>\n",
       "      <td>51</td>\n",
       "      <td>161</td>\n",
       "      <td>False</td>\n",
       "      <td>1397500466</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BA7917AA4B620B13264A68F455203934</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581806111</td>\n",
       "      <td>0C2C7F0E7FBA8F4EB65DBB58444DF0A7</td>\n",
       "      <td>174082</td>\n",
       "      <td>76</td>\n",
       "      <td>False</td>\n",
       "      <td>1435544008</td>\n",
       "      <td>0000376314CAC0A3E9D4FCF4A29004D6</td>\n",
       "      <td>358</td>\n",
       "      <td>619</td>\n",
       "      <td>False</td>\n",
       "      <td>1265686239</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>60DD856C81BC0A115365FA556672C2DB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581759166</td>\n",
       "      <td>44B7E6CBC9D0431AE935F2FB6DECB791</td>\n",
       "      <td>4167</td>\n",
       "      <td>4351</td>\n",
       "      <td>False</td>\n",
       "      <td>1384335793</td>\n",
       "      <td>000043D9A730DF47697D0750F509B56A</td>\n",
       "      <td>924</td>\n",
       "      <td>931</td>\n",
       "      <td>False</td>\n",
       "      <td>1359042454</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>CFBD0716FB1FE99692156EC937E598B7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>4DC22C3F31C5C43721E6B5815A595ED6</td>\n",
       "      <td>1581616980</td>\n",
       "      <td>D712F0607FA3F86B1EB405CB7CD1C48B</td>\n",
       "      <td>845016</td>\n",
       "      <td>635502</td>\n",
       "      <td>False</td>\n",
       "      <td>1339918259</td>\n",
       "      <td>00004E42009644A7647E8C988C072D9D</td>\n",
       "      <td>1242</td>\n",
       "      <td>1072</td>\n",
       "      <td>False</td>\n",
       "      <td>1497979462</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97B2B88F6A4C30F67AD0868F8EBE5986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582143240</td>\n",
       "      <td>D84B57C9B2077C327D5373C6C7795AE6</td>\n",
       "      <td>10255</td>\n",
       "      <td>7985</td>\n",
       "      <td>False</td>\n",
       "      <td>1416142943</td>\n",
       "      <td>000052B88E27886C4DAEF585806A67BC</td>\n",
       "      <td>1136</td>\n",
       "      <td>1172</td>\n",
       "      <td>False</td>\n",
       "      <td>1520674893</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7FC54E66347A4EDC2965E895F8BE0E14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582063800</td>\n",
       "      <td>8C10014037651CB5465005BD522FEFF2</td>\n",
       "      <td>1460017</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>1373488947</td>\n",
       "      <td>00005A197A6EDCF7203D74FE49D1977A</td>\n",
       "      <td>6</td>\n",
       "      <td>140</td>\n",
       "      <td>False</td>\n",
       "      <td>1556836074</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>F0F2FBE57F08E7C4326682B5EDA63E3E\\tE78674D32346...</td>\n",
       "      <td>00E5688F5726E0CA26ABA4DDDEE9C094</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35EBE21A70710541AB4414269C370A53</td>\n",
       "      <td>C0F5328C1D36CD4B34D1808012E18D46</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581602924</td>\n",
       "      <td>6FF3BD9659F59DFB294E421BC5513CBF</td>\n",
       "      <td>385</td>\n",
       "      <td>404</td>\n",
       "      <td>False</td>\n",
       "      <td>1482408524</td>\n",
       "      <td>000061D27D4B361292B89ABE39C55B73</td>\n",
       "      <td>147</td>\n",
       "      <td>143</td>\n",
       "      <td>False</td>\n",
       "      <td>1449319908</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NaN</td>\n",
       "      <td>F5A025A1FD812FF90953F7BF674DB41A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581973984</td>\n",
       "      <td>8B5B7C25946593F5F891AF46BE8F43FF</td>\n",
       "      <td>12338</td>\n",
       "      <td>62</td>\n",
       "      <td>False</td>\n",
       "      <td>1570718917</td>\n",
       "      <td>000062A7E0F2A45147C9BB597B0EDE89</td>\n",
       "      <td>1630</td>\n",
       "      <td>4050</td>\n",
       "      <td>False</td>\n",
       "      <td>1396775852</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>CA9E12A4233C5D315168FCFCDB6CCF00\\t50E71F214FFC...</td>\n",
       "      <td>6E81940D786F4D4350556B769066BE90</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581873200</td>\n",
       "      <td>993E9663A49A23C484EA7D7272A7F2FF</td>\n",
       "      <td>360326</td>\n",
       "      <td>249</td>\n",
       "      <td>False</td>\n",
       "      <td>1282593651</td>\n",
       "      <td>00006B88F1B1F669C69E886F71FA0562</td>\n",
       "      <td>145</td>\n",
       "      <td>259</td>\n",
       "      <td>False</td>\n",
       "      <td>1256517973</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8F52DAF4FE28E882318DBEB31CE44D8D</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581568465</td>\n",
       "      <td>C745D6C6DE8C47A539B49378D532A3A4</td>\n",
       "      <td>1074</td>\n",
       "      <td>683</td>\n",
       "      <td>False</td>\n",
       "      <td>1396672637</td>\n",
       "      <td>00006C241A4CC8493E9A2B7A8B8BAFC4</td>\n",
       "      <td>819</td>\n",
       "      <td>1139</td>\n",
       "      <td>False</td>\n",
       "      <td>1556505243</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NaN</td>\n",
       "      <td>A33EF77E363D590FFB25EBC63C30F9EF</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581908521</td>\n",
       "      <td>16729110BA2AC2C2B188689DC30CCF77</td>\n",
       "      <td>2039</td>\n",
       "      <td>1889</td>\n",
       "      <td>False</td>\n",
       "      <td>1411598977</td>\n",
       "      <td>00007598E2C7054842F773BDB77325B1</td>\n",
       "      <td>4</td>\n",
       "      <td>44</td>\n",
       "      <td>False</td>\n",
       "      <td>1579229979</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6A1C257A09B245C3281F0EB4B775FB76</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>167115458A0DBDFF7E9C0C53A83BAC9B</td>\n",
       "      <td>1582069091</td>\n",
       "      <td>C4FB3BD25D02330A357D454F31819744</td>\n",
       "      <td>111701</td>\n",
       "      <td>67138</td>\n",
       "      <td>False</td>\n",
       "      <td>1308230601</td>\n",
       "      <td>000076B7463DD70EA33BA32BC7EF2183</td>\n",
       "      <td>302</td>\n",
       "      <td>287</td>\n",
       "      <td>False</td>\n",
       "      <td>1330007601</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6BB7C06FF8F57FE651787E7A495D3A9B</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581960451</td>\n",
       "      <td>830F0D746C5E837159B4327140DCBF09</td>\n",
       "      <td>1370</td>\n",
       "      <td>3157</td>\n",
       "      <td>False</td>\n",
       "      <td>1375455555</td>\n",
       "      <td>00007745A6EE969F1A0F44B10DC17671</td>\n",
       "      <td>269</td>\n",
       "      <td>527</td>\n",
       "      <td>False</td>\n",
       "      <td>1252294800</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8FEC9E259D090026AB5945CBAEE94B10</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582137224</td>\n",
       "      <td>7D90F1B4EFC10722F425B02F7DED598E</td>\n",
       "      <td>630</td>\n",
       "      <td>473</td>\n",
       "      <td>False</td>\n",
       "      <td>1359141957</td>\n",
       "      <td>00007F3FBC7BCD812E299A58BEE6E85A</td>\n",
       "      <td>674</td>\n",
       "      <td>467</td>\n",
       "      <td>False</td>\n",
       "      <td>1327286025</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NaN</td>\n",
       "      <td>F851EF690616C54B8349DF40DC6709F3</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582117014</td>\n",
       "      <td>1C19F315F22297759D1198F302C9219A</td>\n",
       "      <td>4111</td>\n",
       "      <td>96</td>\n",
       "      <td>False</td>\n",
       "      <td>1462695428</td>\n",
       "      <td>00007F3FBC7BCD812E299A58BEE6E85A</td>\n",
       "      <td>674</td>\n",
       "      <td>467</td>\n",
       "      <td>False</td>\n",
       "      <td>1327286025</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>F9D6DE090E58378A1D56477D6CF5900B</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581589066</td>\n",
       "      <td>2F3DBE5E912F01E4ED6AA236D5C04F3B</td>\n",
       "      <td>412</td>\n",
       "      <td>271</td>\n",
       "      <td>False</td>\n",
       "      <td>1437576799</td>\n",
       "      <td>0000832375EA08BC57FF1BE30E0DAFC8</td>\n",
       "      <td>39</td>\n",
       "      <td>107</td>\n",
       "      <td>False</td>\n",
       "      <td>1545357629</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NaN</td>\n",
       "      <td>29A3ABC56F75F6AFFE2DA7408ECA2074</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1C38BCF4FF3435C61053A32BB5FB8DAF</td>\n",
       "      <td>0DFD12E5919EF4958D51151C81133A26</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581888390</td>\n",
       "      <td>2265A0FF61C8C4C560CFFF5DBDCD59B9</td>\n",
       "      <td>270611</td>\n",
       "      <td>1079</td>\n",
       "      <td>True</td>\n",
       "      <td>1237854816</td>\n",
       "      <td>0000838F8591DDDCEA1B0E959AE8A20D</td>\n",
       "      <td>1829</td>\n",
       "      <td>1546</td>\n",
       "      <td>False</td>\n",
       "      <td>1342793182</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>458E0A1F2481688C8FAD9716CBB1641C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FF14005C53E2BB1331B4899425B78D36</td>\n",
       "      <td>9A7DD49AFF669B1963E845647F41FD70</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581678610</td>\n",
       "      <td>6F541AE941159024B739E8871BC28D7E</td>\n",
       "      <td>10422012</td>\n",
       "      <td>381</td>\n",
       "      <td>True</td>\n",
       "      <td>1172656645</td>\n",
       "      <td>0000838F8591DDDCEA1B0E959AE8A20D</td>\n",
       "      <td>1826</td>\n",
       "      <td>1545</td>\n",
       "      <td>False</td>\n",
       "      <td>1342793182</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>94B5AA4D33F00ED7292F31E6F38F21B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581630135</td>\n",
       "      <td>711CEB76A207FF3DDFF55D0F2D67BCD8</td>\n",
       "      <td>63449</td>\n",
       "      <td>49295</td>\n",
       "      <td>False</td>\n",
       "      <td>1302883163</td>\n",
       "      <td>000085C3AE598387700C7C7D71FF9022</td>\n",
       "      <td>826</td>\n",
       "      <td>3128</td>\n",
       "      <td>False</td>\n",
       "      <td>1228574186</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E075D594680BAC15322782C1F035724B</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581941935</td>\n",
       "      <td>002700F152911530B4D2583796333AB5</td>\n",
       "      <td>34152</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>1564771226</td>\n",
       "      <td>000085C56613B04BE959892220AC08BD</td>\n",
       "      <td>363</td>\n",
       "      <td>858</td>\n",
       "      <td>False</td>\n",
       "      <td>1425333217</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E24B6F9115F9FC533698AD5D27489494</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>06D61DCBBE938971E1EA0C38BD9B5446</td>\n",
       "      <td>1581635708</td>\n",
       "      <td>5B14F539BC32262C06FEE9D9B1E1B752</td>\n",
       "      <td>1819</td>\n",
       "      <td>1098</td>\n",
       "      <td>False</td>\n",
       "      <td>1561375451</td>\n",
       "      <td>000087B33602940BAA79A05E9A0C4798</td>\n",
       "      <td>992</td>\n",
       "      <td>902</td>\n",
       "      <td>False</td>\n",
       "      <td>1533851297</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NaN</td>\n",
       "      <td>00964D92CB73D14223572E7D235B32E3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581607378</td>\n",
       "      <td>5A61EDBB0AC0614F57ECCA92C51DBDAA</td>\n",
       "      <td>266403</td>\n",
       "      <td>274</td>\n",
       "      <td>True</td>\n",
       "      <td>1494727150</td>\n",
       "      <td>00008D90EE01B655091318240FA2C500</td>\n",
       "      <td>356</td>\n",
       "      <td>351</td>\n",
       "      <td>False</td>\n",
       "      <td>1416386970</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NaN</td>\n",
       "      <td>A3235C43ABFCB010F60A6F5B058324D7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>B9175601E87101A984A50F8A62A1C374</td>\n",
       "      <td>1581848711</td>\n",
       "      <td>AB08EE49347A1B688A92F268FBE14A0A</td>\n",
       "      <td>1248</td>\n",
       "      <td>298</td>\n",
       "      <td>False</td>\n",
       "      <td>1572027875</td>\n",
       "      <td>00008E9A2F95C7D0590584CEBF940364</td>\n",
       "      <td>53</td>\n",
       "      <td>78</td>\n",
       "      <td>False</td>\n",
       "      <td>1552761597</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BF8B8A72990E532C966217A2B1A3A35F</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>6431A618DCF7F4CB7F62A95A39BAB77A</td>\n",
       "      <td>1581572388</td>\n",
       "      <td>C48C45A9C319C5F6317ABAB64CE35FA0</td>\n",
       "      <td>2061</td>\n",
       "      <td>333</td>\n",
       "      <td>False</td>\n",
       "      <td>1405972421</td>\n",
       "      <td>00008E9A2F95C7D0590584CEBF940364</td>\n",
       "      <td>53</td>\n",
       "      <td>78</td>\n",
       "      <td>False</td>\n",
       "      <td>1552761597</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B3A38E79DA2BC05FF237AE1B5F243582</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581669392</td>\n",
       "      <td>D6F5E57A33330FEF4F831ABC7E75F341</td>\n",
       "      <td>41778</td>\n",
       "      <td>2986</td>\n",
       "      <td>False</td>\n",
       "      <td>1230775076</td>\n",
       "      <td>00008EA411ACD86E707FFB0A9BF58C0D</td>\n",
       "      <td>1681</td>\n",
       "      <td>2060</td>\n",
       "      <td>False</td>\n",
       "      <td>1236732673</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98B69D5858773A54C03A14AB2246FBD0</td>\n",
       "      <td>Video</td>\n",
       "      <td>2F330EAF5B53C0D578C9BBD05A4835BE</td>\n",
       "      <td>D7CE0AEFC0103720D70E70EA88EF2D59</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>4DC22C3F31C5C43721E6B5815A595ED6</td>\n",
       "      <td>1581604243</td>\n",
       "      <td>5029E3514B761C00854077B58DF057D0</td>\n",
       "      <td>885777</td>\n",
       "      <td>18</td>\n",
       "      <td>True</td>\n",
       "      <td>1321175705</td>\n",
       "      <td>00009E737CA0B3E06851FD81E8EDC780</td>\n",
       "      <td>305</td>\n",
       "      <td>952</td>\n",
       "      <td>False</td>\n",
       "      <td>1392147575</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>NaN</td>\n",
       "      <td>671D38BC9340D81CDAFF5567B841C2E4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>FDE160FC8A989EC235EA9FFD8CBAB8C1</td>\n",
       "      <td>E91CDEC8DC7ABF30592FA024616FF970</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581617140</td>\n",
       "      <td>0A84FCF785CD487EE831B7EA757962DD</td>\n",
       "      <td>85941</td>\n",
       "      <td>36581</td>\n",
       "      <td>False</td>\n",
       "      <td>1407252198</td>\n",
       "      <td>0000A2FB0CF47CDF6A304DC1915D024F</td>\n",
       "      <td>2271</td>\n",
       "      <td>1256</td>\n",
       "      <td>False</td>\n",
       "      <td>1324227365</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>F08B34EB716D7EBABBA8F468B8677427\\t3163A5EBCEC2...</td>\n",
       "      <td>8C11802A619C9F8FA122E10A80EBDB6B</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581855437</td>\n",
       "      <td>B5E2DD28EF95A0F689D33613838CBDC5</td>\n",
       "      <td>2207882</td>\n",
       "      <td>211</td>\n",
       "      <td>True</td>\n",
       "      <td>1442926734</td>\n",
       "      <td>0000A40A818B1D07A3AD5810F608A302</td>\n",
       "      <td>0</td>\n",
       "      <td>20</td>\n",
       "      <td>False</td>\n",
       "      <td>1573122106</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>6AC5277FBFDFD8B273C06C758E3EC39A</td>\n",
       "      <td>3E01C774B10A10863BBEE9219779F497</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2AA78FE3E6E43DFEFE484EFEE89471AD</td>\n",
       "      <td>A00C631D2C24302736B9ED2DF7A1D20F</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581570536</td>\n",
       "      <td>B2ADD1F975FEDCEAED8925D8104A5A25</td>\n",
       "      <td>346</td>\n",
       "      <td>368</td>\n",
       "      <td>False</td>\n",
       "      <td>1563352662</td>\n",
       "      <td>0000B0CAB9BADF2CC04596E5D543892F</td>\n",
       "      <td>899</td>\n",
       "      <td>657</td>\n",
       "      <td>False</td>\n",
       "      <td>1470487711</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>99CCA670D6FD350E8CF02DA5081D77BE</td>\n",
       "      <td>E42A562F6B5CF4107410B94101B4030C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1582000109</td>\n",
       "      <td>87988FC74B8AC3AA09B22C266E91D5B0</td>\n",
       "      <td>279</td>\n",
       "      <td>366</td>\n",
       "      <td>False</td>\n",
       "      <td>1506993134</td>\n",
       "      <td>0000B0CAB9BADF2CC04596E5D543892F</td>\n",
       "      <td>906</td>\n",
       "      <td>663</td>\n",
       "      <td>False</td>\n",
       "      <td>1470487711</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>NaN</td>\n",
       "      <td>F8E403D8DB9B3A4133B4547674357C0B</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581772220</td>\n",
       "      <td>DD5E285BAF3DB6AD7F8469A036C1371B</td>\n",
       "      <td>1017</td>\n",
       "      <td>733</td>\n",
       "      <td>False</td>\n",
       "      <td>1362153347</td>\n",
       "      <td>0000B0CAB9BADF2CC04596E5D543892F</td>\n",
       "      <td>906</td>\n",
       "      <td>663</td>\n",
       "      <td>False</td>\n",
       "      <td>1470487711</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>NaN</td>\n",
       "      <td>662CCE391FAF1FC63C09EA4CEBB55B79</td>\n",
       "      <td>Photo\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581791115</td>\n",
       "      <td>6C05D5C53D491D8D7260162B63265C6A</td>\n",
       "      <td>180</td>\n",
       "      <td>273</td>\n",
       "      <td>False</td>\n",
       "      <td>1341334739</td>\n",
       "      <td>0000B30E6FAA5EB99D95D57DB1D61EA5</td>\n",
       "      <td>872</td>\n",
       "      <td>931</td>\n",
       "      <td>False</td>\n",
       "      <td>1302779696</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>NaN</td>\n",
       "      <td>BCB496E82772407E1266471C646BDF55</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581975873</td>\n",
       "      <td>2C3202244A539D79C704197DF064DB7A</td>\n",
       "      <td>7778</td>\n",
       "      <td>147</td>\n",
       "      <td>False</td>\n",
       "      <td>1384470429</td>\n",
       "      <td>0000B53E908F953FDEED69F83BEA7785</td>\n",
       "      <td>149</td>\n",
       "      <td>654</td>\n",
       "      <td>False</td>\n",
       "      <td>1414091523</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             hashtags  \\\n",
       "0                                                 NaN   \n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                                                 NaN   \n",
       "6   AC6D3435D9553482AD3DE25576B85B57\\t699C2389AC28...   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                                                NaN   \n",
       "11                                                NaN   \n",
       "12                                                NaN   \n",
       "13  F0F2FBE57F08E7C4326682B5EDA63E3E\\tE78674D32346...   \n",
       "14                                                NaN   \n",
       "15  CA9E12A4233C5D315168FCFCDB6CCF00\\t50E71F214FFC...   \n",
       "16                                                NaN   \n",
       "17                                                NaN   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22                                                NaN   \n",
       "23                                                NaN   \n",
       "24                                                NaN   \n",
       "25                                                NaN   \n",
       "26                                                NaN   \n",
       "27                                                NaN   \n",
       "28                                                NaN   \n",
       "29                                                NaN   \n",
       "30                                                NaN   \n",
       "31                                                NaN   \n",
       "32                                                NaN   \n",
       "33                                                NaN   \n",
       "34  F08B34EB716D7EBABBA8F468B8677427\\t3163A5EBCEC2...   \n",
       "35                   6AC5277FBFDFD8B273C06C758E3EC39A   \n",
       "36                   99CCA670D6FD350E8CF02DA5081D77BE   \n",
       "37                                                NaN   \n",
       "38                                                NaN   \n",
       "39                                                NaN   \n",
       "\n",
       "                            tweet_id                       media  \\\n",
       "0   7647B4E9DAF4C1D8973397DC2A04F3E3                       Photo   \n",
       "1   408DB1803264B5FF55F73EC06BE9BD77  Photo\\tPhoto\\tPhoto\\tPhoto   \n",
       "2   2EE951379C47E8BF62EABB8FA027F753                         GIF   \n",
       "3   2135F24B05DAE3EF213F9CE80FDC6DAF         Photo\\tPhoto\\tPhoto   \n",
       "4   09143FEDE9BD494A6EA9A7EE160565E3                       Photo   \n",
       "5   60968762145D2AF58A58AFB376B2B00C                         NaN   \n",
       "6   706310D7975C15B9FB1FA3FBAE8A126B                         NaN   \n",
       "7   DBC37B8C8DC70C70F588D37CC5006ABB                         NaN   \n",
       "8   BA7917AA4B620B13264A68F455203934                         NaN   \n",
       "9   60DD856C81BC0A115365FA556672C2DB                         NaN   \n",
       "10  CFBD0716FB1FE99692156EC937E598B7                         NaN   \n",
       "11  97B2B88F6A4C30F67AD0868F8EBE5986                         NaN   \n",
       "12  7FC54E66347A4EDC2965E895F8BE0E14                         NaN   \n",
       "13  00E5688F5726E0CA26ABA4DDDEE9C094                         NaN   \n",
       "14  F5A025A1FD812FF90953F7BF674DB41A                         NaN   \n",
       "15  6E81940D786F4D4350556B769066BE90         Photo\\tPhoto\\tPhoto   \n",
       "16  8F52DAF4FE28E882318DBEB31CE44D8D                         NaN   \n",
       "17  A33EF77E363D590FFB25EBC63C30F9EF                       Video   \n",
       "18  6A1C257A09B245C3281F0EB4B775FB76                         NaN   \n",
       "19  6BB7C06FF8F57FE651787E7A495D3A9B                         NaN   \n",
       "20  8FEC9E259D090026AB5945CBAEE94B10                       Photo   \n",
       "21  F851EF690616C54B8349DF40DC6709F3                       Video   \n",
       "22  F9D6DE090E58378A1D56477D6CF5900B                       Video   \n",
       "23  29A3ABC56F75F6AFFE2DA7408ECA2074                         NaN   \n",
       "24  458E0A1F2481688C8FAD9716CBB1641C                         NaN   \n",
       "25  94B5AA4D33F00ED7292F31E6F38F21B0                         NaN   \n",
       "26  E075D594680BAC15322782C1F035724B                       Photo   \n",
       "27  E24B6F9115F9FC533698AD5D27489494         Photo\\tPhoto\\tPhoto   \n",
       "28  00964D92CB73D14223572E7D235B32E3                         NaN   \n",
       "29  A3235C43ABFCB010F60A6F5B058324D7                         NaN   \n",
       "30  BF8B8A72990E532C966217A2B1A3A35F                         NaN   \n",
       "31  B3A38E79DA2BC05FF237AE1B5F243582                         NaN   \n",
       "32  98B69D5858773A54C03A14AB2246FBD0                       Video   \n",
       "33  671D38BC9340D81CDAFF5567B841C2E4                         NaN   \n",
       "34  8C11802A619C9F8FA122E10A80EBDB6B                         NaN   \n",
       "35  3E01C774B10A10863BBEE9219779F497                         NaN   \n",
       "36  E42A562F6B5CF4107410B94101B4030C                         NaN   \n",
       "37  F8E403D8DB9B3A4133B4547674357C0B                Photo\\tPhoto   \n",
       "38  662CCE391FAF1FC63C09EA4CEBB55B79                Photo\\tPhoto   \n",
       "39  BCB496E82772407E1266471C646BDF55                         NaN   \n",
       "\n",
       "                               links                           domains  \\\n",
       "0                                NaN                               NaN   \n",
       "1                                NaN                               NaN   \n",
       "2                                NaN                               NaN   \n",
       "3                                NaN                               NaN   \n",
       "4                                NaN                               NaN   \n",
       "5                                NaN                               NaN   \n",
       "6                                NaN                               NaN   \n",
       "7                                NaN                               NaN   \n",
       "8                                NaN                               NaN   \n",
       "9                                NaN                               NaN   \n",
       "10                               NaN                               NaN   \n",
       "11                               NaN                               NaN   \n",
       "12                               NaN                               NaN   \n",
       "13  35EBE21A70710541AB4414269C370A53  C0F5328C1D36CD4B34D1808012E18D46   \n",
       "14                               NaN                               NaN   \n",
       "15                               NaN                               NaN   \n",
       "16                               NaN                               NaN   \n",
       "17                               NaN                               NaN   \n",
       "18                               NaN                               NaN   \n",
       "19                               NaN                               NaN   \n",
       "20                               NaN                               NaN   \n",
       "21                               NaN                               NaN   \n",
       "22                               NaN                               NaN   \n",
       "23  1C38BCF4FF3435C61053A32BB5FB8DAF  0DFD12E5919EF4958D51151C81133A26   \n",
       "24  FF14005C53E2BB1331B4899425B78D36  9A7DD49AFF669B1963E845647F41FD70   \n",
       "25                               NaN                               NaN   \n",
       "26                               NaN                               NaN   \n",
       "27                               NaN                               NaN   \n",
       "28                               NaN                               NaN   \n",
       "29                               NaN                               NaN   \n",
       "30                               NaN                               NaN   \n",
       "31                               NaN                               NaN   \n",
       "32  2F330EAF5B53C0D578C9BBD05A4835BE  D7CE0AEFC0103720D70E70EA88EF2D59   \n",
       "33  FDE160FC8A989EC235EA9FFD8CBAB8C1  E91CDEC8DC7ABF30592FA024616FF970   \n",
       "34                               NaN                               NaN   \n",
       "35  2AA78FE3E6E43DFEFE484EFEE89471AD  A00C631D2C24302736B9ED2DF7A1D20F   \n",
       "36                               NaN                               NaN   \n",
       "37                               NaN                               NaN   \n",
       "38                               NaN                               NaN   \n",
       "39                               NaN                               NaN   \n",
       "\n",
       "   tweet_type                          language   timestamp  \\\n",
       "0    TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581703126   \n",
       "1     Retweet  B9175601E87101A984A50F8A62A1C374  1582021842   \n",
       "2    TopLevel  2996EB2FE8162C076D070A4C8D6532CD  1581734918   \n",
       "3     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581913613   \n",
       "4    TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581565745   \n",
       "5    TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581964037   \n",
       "6    TopLevel  48236EC80FDDDFADE99420ABC9210DDF  1582130362   \n",
       "7     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581836693   \n",
       "8    TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581806111   \n",
       "9     Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581759166   \n",
       "10   TopLevel  4DC22C3F31C5C43721E6B5815A595ED6  1581616980   \n",
       "11    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582143240   \n",
       "12   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582063800   \n",
       "13   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581602924   \n",
       "14   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581973984   \n",
       "15   TopLevel  06D61DCBBE938971E1EA0C38BD9B5446  1581873200   \n",
       "16   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581568465   \n",
       "17   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581908521   \n",
       "18   TopLevel  167115458A0DBDFF7E9C0C53A83BAC9B  1582069091   \n",
       "19   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581960451   \n",
       "20   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582137224   \n",
       "21    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582117014   \n",
       "22    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581589066   \n",
       "23    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581888390   \n",
       "24   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581678610   \n",
       "25   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581630135   \n",
       "26   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581941935   \n",
       "27   TopLevel  06D61DCBBE938971E1EA0C38BD9B5446  1581635708   \n",
       "28   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581607378   \n",
       "29      Quote  B9175601E87101A984A50F8A62A1C374  1581848711   \n",
       "30   TopLevel  6431A618DCF7F4CB7F62A95A39BAB77A  1581572388   \n",
       "31   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581669392   \n",
       "32   TopLevel  4DC22C3F31C5C43721E6B5815A595ED6  1581604243   \n",
       "33    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581617140   \n",
       "34    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581855437   \n",
       "35    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581570536   \n",
       "36    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1582000109   \n",
       "37    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581772220   \n",
       "38   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581791115   \n",
       "39   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581975873   \n",
       "\n",
       "                           a_user_id  a_follower_count  a_following_count  \\\n",
       "0   8A9AB92B775C62C4AB60DF6773A01571             13941               1216   \n",
       "1   825B003667DF15692B1704F44DAFD1A2               186                100   \n",
       "2   2DEC09796D0EB473244CB7A4FB13A0A0            250470                  1   \n",
       "3   487BCB8F1677A6D9EEE612940410CBB6               516                406   \n",
       "4   C85C39DC8D279E51DF1BBF0391E5EB2E             19576                273   \n",
       "5   D0A3A1BDAF414BE91D6D482A021614CC             14839               4679   \n",
       "6   89970E5689082BE57C4D3EEEE32C93F1            184674                119   \n",
       "7   66C2B62912A4B5F43CA870F1BE7D9B5D            735185                465   \n",
       "8   0C2C7F0E7FBA8F4EB65DBB58444DF0A7            174082                 76   \n",
       "9   44B7E6CBC9D0431AE935F2FB6DECB791              4167               4351   \n",
       "10  D712F0607FA3F86B1EB405CB7CD1C48B            845016             635502   \n",
       "11  D84B57C9B2077C327D5373C6C7795AE6             10255               7985   \n",
       "12  8C10014037651CB5465005BD522FEFF2           1460017                  0   \n",
       "13  6FF3BD9659F59DFB294E421BC5513CBF               385                404   \n",
       "14  8B5B7C25946593F5F891AF46BE8F43FF             12338                 62   \n",
       "15  993E9663A49A23C484EA7D7272A7F2FF            360326                249   \n",
       "16  C745D6C6DE8C47A539B49378D532A3A4              1074                683   \n",
       "17  16729110BA2AC2C2B188689DC30CCF77              2039               1889   \n",
       "18  C4FB3BD25D02330A357D454F31819744            111701              67138   \n",
       "19  830F0D746C5E837159B4327140DCBF09              1370               3157   \n",
       "20  7D90F1B4EFC10722F425B02F7DED598E               630                473   \n",
       "21  1C19F315F22297759D1198F302C9219A              4111                 96   \n",
       "22  2F3DBE5E912F01E4ED6AA236D5C04F3B               412                271   \n",
       "23  2265A0FF61C8C4C560CFFF5DBDCD59B9            270611               1079   \n",
       "24  6F541AE941159024B739E8871BC28D7E          10422012                381   \n",
       "25  711CEB76A207FF3DDFF55D0F2D67BCD8             63449              49295   \n",
       "26  002700F152911530B4D2583796333AB5             34152                  1   \n",
       "27  5B14F539BC32262C06FEE9D9B1E1B752              1819               1098   \n",
       "28  5A61EDBB0AC0614F57ECCA92C51DBDAA            266403                274   \n",
       "29  AB08EE49347A1B688A92F268FBE14A0A              1248                298   \n",
       "30  C48C45A9C319C5F6317ABAB64CE35FA0              2061                333   \n",
       "31  D6F5E57A33330FEF4F831ABC7E75F341             41778               2986   \n",
       "32  5029E3514B761C00854077B58DF057D0            885777                 18   \n",
       "33  0A84FCF785CD487EE831B7EA757962DD             85941              36581   \n",
       "34  B5E2DD28EF95A0F689D33613838CBDC5           2207882                211   \n",
       "35  B2ADD1F975FEDCEAED8925D8104A5A25               346                368   \n",
       "36  87988FC74B8AC3AA09B22C266E91D5B0               279                366   \n",
       "37  DD5E285BAF3DB6AD7F8469A036C1371B              1017                733   \n",
       "38  6C05D5C53D491D8D7260162B63265C6A               180                273   \n",
       "39  2C3202244A539D79C704197DF064DB7A              7778                147   \n",
       "\n",
       "    a_is_verified  a_account_creation                         b_user_id  \\\n",
       "0           False          1448292186  0000006C3074607050F1339DDCB890BB   \n",
       "1           False          1263078566  000013315386492275CCBF7AEF293EF0   \n",
       "2           False          1356488269  00001569CB28972FC8173122D9DA162F   \n",
       "3           False          1449096567  00001607209C5774DF9207A2AC0EED5F   \n",
       "4            True          1236181798  0000177705514C315F2FC6DFA3872712   \n",
       "5           False          1451365323  00001BC70532632181F17B2A65EFD2BA   \n",
       "6           False          1469218273  00001F56CDCF81D2EF635B3C0EDE57EB   \n",
       "7           False          1388514185  00001F56CDCF81D2EF635B3C0EDE57EB   \n",
       "8           False          1435544008  0000376314CAC0A3E9D4FCF4A29004D6   \n",
       "9           False          1384335793  000043D9A730DF47697D0750F509B56A   \n",
       "10          False          1339918259  00004E42009644A7647E8C988C072D9D   \n",
       "11          False          1416142943  000052B88E27886C4DAEF585806A67BC   \n",
       "12          False          1373488947  00005A197A6EDCF7203D74FE49D1977A   \n",
       "13          False          1482408524  000061D27D4B361292B89ABE39C55B73   \n",
       "14          False          1570718917  000062A7E0F2A45147C9BB597B0EDE89   \n",
       "15          False          1282593651  00006B88F1B1F669C69E886F71FA0562   \n",
       "16          False          1396672637  00006C241A4CC8493E9A2B7A8B8BAFC4   \n",
       "17          False          1411598977  00007598E2C7054842F773BDB77325B1   \n",
       "18          False          1308230601  000076B7463DD70EA33BA32BC7EF2183   \n",
       "19          False          1375455555  00007745A6EE969F1A0F44B10DC17671   \n",
       "20          False          1359141957  00007F3FBC7BCD812E299A58BEE6E85A   \n",
       "21          False          1462695428  00007F3FBC7BCD812E299A58BEE6E85A   \n",
       "22          False          1437576799  0000832375EA08BC57FF1BE30E0DAFC8   \n",
       "23           True          1237854816  0000838F8591DDDCEA1B0E959AE8A20D   \n",
       "24           True          1172656645  0000838F8591DDDCEA1B0E959AE8A20D   \n",
       "25          False          1302883163  000085C3AE598387700C7C7D71FF9022   \n",
       "26          False          1564771226  000085C56613B04BE959892220AC08BD   \n",
       "27          False          1561375451  000087B33602940BAA79A05E9A0C4798   \n",
       "28           True          1494727150  00008D90EE01B655091318240FA2C500   \n",
       "29          False          1572027875  00008E9A2F95C7D0590584CEBF940364   \n",
       "30          False          1405972421  00008E9A2F95C7D0590584CEBF940364   \n",
       "31          False          1230775076  00008EA411ACD86E707FFB0A9BF58C0D   \n",
       "32           True          1321175705  00009E737CA0B3E06851FD81E8EDC780   \n",
       "33          False          1407252198  0000A2FB0CF47CDF6A304DC1915D024F   \n",
       "34           True          1442926734  0000A40A818B1D07A3AD5810F608A302   \n",
       "35          False          1563352662  0000B0CAB9BADF2CC04596E5D543892F   \n",
       "36          False          1506993134  0000B0CAB9BADF2CC04596E5D543892F   \n",
       "37          False          1362153347  0000B0CAB9BADF2CC04596E5D543892F   \n",
       "38          False          1341334739  0000B30E6FAA5EB99D95D57DB1D61EA5   \n",
       "39          False          1384470429  0000B53E908F953FDEED69F83BEA7785   \n",
       "\n",
       "    b_follower_count  b_following_count  b_is_verified  b_account_creation  \\\n",
       "0              27448                600          False          1520948869   \n",
       "1                139                956          False          1335110299   \n",
       "2                 16                 97          False          1503940711   \n",
       "3                460                693          False          1396311956   \n",
       "4                468               3837          False          1298646801   \n",
       "5                118                959          False          1424527389   \n",
       "6                 52                162          False          1397500466   \n",
       "7                 51                161          False          1397500466   \n",
       "8                358                619          False          1265686239   \n",
       "9                924                931          False          1359042454   \n",
       "10              1242               1072          False          1497979462   \n",
       "11              1136               1172          False          1520674893   \n",
       "12                 6                140          False          1556836074   \n",
       "13               147                143          False          1449319908   \n",
       "14              1630               4050          False          1396775852   \n",
       "15               145                259          False          1256517973   \n",
       "16               819               1139          False          1556505243   \n",
       "17                 4                 44          False          1579229979   \n",
       "18               302                287          False          1330007601   \n",
       "19               269                527          False          1252294800   \n",
       "20               674                467          False          1327286025   \n",
       "21               674                467          False          1327286025   \n",
       "22                39                107          False          1545357629   \n",
       "23              1829               1546          False          1342793182   \n",
       "24              1826               1545          False          1342793182   \n",
       "25               826               3128          False          1228574186   \n",
       "26               363                858          False          1425333217   \n",
       "27               992                902          False          1533851297   \n",
       "28               356                351          False          1416386970   \n",
       "29                53                 78          False          1552761597   \n",
       "30                53                 78          False          1552761597   \n",
       "31              1681               2060          False          1236732673   \n",
       "32               305                952          False          1392147575   \n",
       "33              2271               1256          False          1324227365   \n",
       "34                 0                 20          False          1573122106   \n",
       "35               899                657          False          1470487711   \n",
       "36               906                663          False          1470487711   \n",
       "37               906                663          False          1470487711   \n",
       "38               872                931          False          1302779696   \n",
       "39               149                654          False          1414091523   \n",
       "\n",
       "    b_follows_a  \n",
       "0          True  \n",
       "1         False  \n",
       "2         False  \n",
       "3          True  \n",
       "4         False  \n",
       "5         False  \n",
       "6         False  \n",
       "7         False  \n",
       "8         False  \n",
       "9          True  \n",
       "10         True  \n",
       "11         True  \n",
       "12        False  \n",
       "13         True  \n",
       "14        False  \n",
       "15        False  \n",
       "16         True  \n",
       "17        False  \n",
       "18         True  \n",
       "19         True  \n",
       "20         True  \n",
       "21        False  \n",
       "22         True  \n",
       "23        False  \n",
       "24        False  \n",
       "25         True  \n",
       "26        False  \n",
       "27        False  \n",
       "28        False  \n",
       "29        False  \n",
       "30        False  \n",
       "31        False  \n",
       "32        False  \n",
       "33         True  \n",
       "34        False  \n",
       "35         True  \n",
       "36         True  \n",
       "37         True  \n",
       "38         True  \n",
       "39        False  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = [\n",
    "    'text_tokens',    ###############\n",
    "    'hashtags',       #Tweet Features\n",
    "    'tweet_id',       #\n",
    "    'media',          #\n",
    "    'links',          #\n",
    "    'domains',        #\n",
    "    'tweet_type',     #\n",
    "    'language',       #\n",
    "    'timestamp',      ###############\n",
    "    'a_user_id',              ###########################\n",
    "    'a_follower_count',       #Engaged With User Features\n",
    "    'a_following_count',      #\n",
    "    'a_is_verified',          #\n",
    "    'a_account_creation',     ###########################\n",
    "    'b_user_id',              #######################\n",
    "    'b_follower_count',       #Engaging User Features\n",
    "    'b_following_count',      #\n",
    "    'b_is_verified',          #\n",
    "    'b_account_creation',     #######################\n",
    "    'b_follows_a',    #################### Engagement Features\n",
    "    #'reply',          #Target Reply\n",
    "    #'retweet',        #Target Retweet    \n",
    "    #'retweet_comment',#Target Retweet with comment\n",
    "    #'like',           #Target Like\n",
    "                      ####################\n",
    "]\n",
    "dv.columns = features\n",
    "gc.collect()\n",
    "\n",
    "dv = dv.drop('text_tokens', axis=1)\n",
    "gc.collect()\n",
    "\n",
    "dv.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "121386431\n",
      "(121386431, 24) (12434735, 24)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7647B4E9DAF4C1D8973397DC2A04F3E3</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581703126</td>\n",
       "      <td>8A9AB92B775C62C4AB60DF6773A01571</td>\n",
       "      <td>13941</td>\n",
       "      <td>...</td>\n",
       "      <td>27448</td>\n",
       "      <td>600</td>\n",
       "      <td>False</td>\n",
       "      <td>1520948869</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121386431</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>408DB1803264B5FF55F73EC06BE9BD77</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>B9175601E87101A984A50F8A62A1C374</td>\n",
       "      <td>1582021842</td>\n",
       "      <td>825B003667DF15692B1704F44DAFD1A2</td>\n",
       "      <td>186</td>\n",
       "      <td>...</td>\n",
       "      <td>139</td>\n",
       "      <td>956</td>\n",
       "      <td>False</td>\n",
       "      <td>1335110299</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121386432</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2EE951379C47E8BF62EABB8FA027F753</td>\n",
       "      <td>GIF</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>2996EB2FE8162C076D070A4C8D6532CD</td>\n",
       "      <td>1581734918</td>\n",
       "      <td>2DEC09796D0EB473244CB7A4FB13A0A0</td>\n",
       "      <td>250470</td>\n",
       "      <td>...</td>\n",
       "      <td>16</td>\n",
       "      <td>97</td>\n",
       "      <td>False</td>\n",
       "      <td>1503940711</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121386433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2135F24B05DAE3EF213F9CE80FDC6DAF</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581913613</td>\n",
       "      <td>487BCB8F1677A6D9EEE612940410CBB6</td>\n",
       "      <td>516</td>\n",
       "      <td>...</td>\n",
       "      <td>460</td>\n",
       "      <td>693</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121386434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>09143FEDE9BD494A6EA9A7EE160565E3</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581565745</td>\n",
       "      <td>C85C39DC8D279E51DF1BBF0391E5EB2E</td>\n",
       "      <td>19576</td>\n",
       "      <td>...</td>\n",
       "      <td>468</td>\n",
       "      <td>3837</td>\n",
       "      <td>False</td>\n",
       "      <td>1298646801</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>121386435</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  hashtags                          tweet_id                       media  \\\n",
       "0      NaN  7647B4E9DAF4C1D8973397DC2A04F3E3                       Photo   \n",
       "1      NaN  408DB1803264B5FF55F73EC06BE9BD77  Photo\\tPhoto\\tPhoto\\tPhoto   \n",
       "2      NaN  2EE951379C47E8BF62EABB8FA027F753                         GIF   \n",
       "3      NaN  2135F24B05DAE3EF213F9CE80FDC6DAF         Photo\\tPhoto\\tPhoto   \n",
       "4      NaN  09143FEDE9BD494A6EA9A7EE160565E3                       Photo   \n",
       "\n",
       "  links domains tweet_type                          language   timestamp  \\\n",
       "0   NaN     NaN   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581703126   \n",
       "1   NaN     NaN    Retweet  B9175601E87101A984A50F8A62A1C374  1582021842   \n",
       "2   NaN     NaN   TopLevel  2996EB2FE8162C076D070A4C8D6532CD  1581734918   \n",
       "3   NaN     NaN    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581913613   \n",
       "4   NaN     NaN   TopLevel  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581565745   \n",
       "\n",
       "                          a_user_id  a_follower_count  ...  b_follower_count  \\\n",
       "0  8A9AB92B775C62C4AB60DF6773A01571             13941  ...             27448   \n",
       "1  825B003667DF15692B1704F44DAFD1A2               186  ...               139   \n",
       "2  2DEC09796D0EB473244CB7A4FB13A0A0            250470  ...                16   \n",
       "3  487BCB8F1677A6D9EEE612940410CBB6               516  ...               460   \n",
       "4  C85C39DC8D279E51DF1BBF0391E5EB2E             19576  ...               468   \n",
       "\n",
       "   b_following_count  b_is_verified b_account_creation  b_follows_a  reply  \\\n",
       "0                600          False         1520948869         True      0   \n",
       "1                956          False         1335110299        False      0   \n",
       "2                 97          False         1503940711        False      0   \n",
       "3                693          False         1396311956         True      0   \n",
       "4               3837          False         1298646801        False      0   \n",
       "\n",
       "   retweet  retweet_comment  like         id  \n",
       "0        0                0     0  121386431  \n",
       "1        0                0     0  121386432  \n",
       "2        0                0     0  121386433  \n",
       "3        0                0     0  121386434  \n",
       "4        0                0     0  121386435  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dv['reply']           = 0\n",
    "dv['retweet']         = 0\n",
    "dv['retweet_comment'] = 0\n",
    "dv['like']            = 0\n",
    "\n",
    "dv['id']   = np.arange( df.shape[0] , df.shape[0]+dv.shape[0] )\n",
    "dv['id']   = dv['id'].astype(np.uint32)\n",
    "print( dv['id'].min() )\n",
    "\n",
    "dv['reply']           = dv['reply'].astype( np.uint32 )\n",
    "dv['retweet']         = dv['retweet'].astype( np.uint32 )\n",
    "dv['retweet_comment'] = dv['retweet_comment'].astype( np.uint32 )\n",
    "dv['like']            = dv['like'].astype( np.uint32 )\n",
    "\n",
    "dv['timestamp']         = dv['timestamp'].astype( np.uint32 )\n",
    "dv['a_follower_count']  = dv['a_follower_count'].astype( np.uint32 )\n",
    "dv['a_following_count'] = dv['a_following_count'].astype( np.uint32 )\n",
    "dv['a_account_creation']= dv['a_account_creation'].astype( np.uint32 )\n",
    "dv['b_follower_count']  = dv['b_follower_count'].astype( np.uint32 )\n",
    "dv['b_following_count'] = dv['b_following_count'].astype( np.uint32 )\n",
    "dv['b_account_creation']= dv['b_account_creation'].astype( np.uint32 )\n",
    "gc.collect()\n",
    "\n",
    "print(df.shape,dv.shape)\n",
    "dv.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12434838, 20)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt = pd.read_csv('../input/competition_test.tsv', sep='\\x01', header=None, dtype=DTYPES )\n",
    "gc.collect()\n",
    "dt.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>a_following_count</th>\n",
       "      <th>a_is_verified</th>\n",
       "      <th>a_account_creation</th>\n",
       "      <th>b_user_id</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>04746004AA1F5498834CE7A4C6343D1A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581759640</td>\n",
       "      <td>6720CC7830F94CB7465CA283300DB010</td>\n",
       "      <td>119</td>\n",
       "      <td>125</td>\n",
       "      <td>False</td>\n",
       "      <td>1571666822</td>\n",
       "      <td>00000776B07587ECA9717BFC301F2D6E</td>\n",
       "      <td>111</td>\n",
       "      <td>673</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>024FE90EC2C01B3CDC46A5A90D66B020\\t1B78BDD9C7FF...</td>\n",
       "      <td>B5C4CBE185831F3E5A58A4D81118D4C7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581668217</td>\n",
       "      <td>7DDC67265CFB6E0B4820E0BD0E33A8D3</td>\n",
       "      <td>189</td>\n",
       "      <td>264</td>\n",
       "      <td>False</td>\n",
       "      <td>1575966890</td>\n",
       "      <td>00000776B07587ECA9717BFC301F2D6E</td>\n",
       "      <td>111</td>\n",
       "      <td>673</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6B6836351BFAA6D1CC1EB0386BCB8C6A</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582046459</td>\n",
       "      <td>5456A10C7E4F7A415948EA88BE6845D6</td>\n",
       "      <td>4312</td>\n",
       "      <td>660</td>\n",
       "      <td>False</td>\n",
       "      <td>1494251627</td>\n",
       "      <td>00000B85AAF7DE172876FD96718C4469</td>\n",
       "      <td>1150</td>\n",
       "      <td>48</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2D09C59493DAC82D8054E79343DFE76A</td>\n",
       "      <td>0DCF558E40500F22F84F98C4E7C38EDC</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>125C57F4FA6D4E110983FB11B52EFD4E</td>\n",
       "      <td>1582083666</td>\n",
       "      <td>9D421C234C7B59A0EDC8D85C847D4569</td>\n",
       "      <td>272</td>\n",
       "      <td>185</td>\n",
       "      <td>False</td>\n",
       "      <td>1559086871</td>\n",
       "      <td>00000E0C9B364891CDE89ECFC54771DE</td>\n",
       "      <td>781</td>\n",
       "      <td>442</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A0AD2EB95B7C918A311D9432E9A8FF7A</td>\n",
       "      <td>F13AA57F12DD6107D9D8544A27BDE9EC</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581779241</td>\n",
       "      <td>F63ECD1C7827E767E7C44E9A717056AC</td>\n",
       "      <td>1020</td>\n",
       "      <td>2097</td>\n",
       "      <td>False</td>\n",
       "      <td>1468438879</td>\n",
       "      <td>0000109A57AFA64758EE4AAE2A01BFC7</td>\n",
       "      <td>15</td>\n",
       "      <td>123</td>\n",
       "      <td>False</td>\n",
       "      <td>1385502405</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>9153C7AC0C4F20954C85F75BAD74A7E5\\tD431B857B80B...</td>\n",
       "      <td>01ADD87BE33683487889D721B5DD3910</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581733184</td>\n",
       "      <td>A1252D063C9C6AE6C8BDE545D2EF3A17</td>\n",
       "      <td>796758</td>\n",
       "      <td>873936</td>\n",
       "      <td>True</td>\n",
       "      <td>1348444465</td>\n",
       "      <td>00001607209C5774DF9207A2AC0EED5F</td>\n",
       "      <td>462</td>\n",
       "      <td>694</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>5E35903C153D1CA5503B8AD186A652F3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0A07DC86E990CF6D15A8C06150FE75D2</td>\n",
       "      <td>3896E26D12C903F0A00B6B1BE9A9BEA3</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582023785</td>\n",
       "      <td>70AAF2A997947D596A573D439E0634C8</td>\n",
       "      <td>178</td>\n",
       "      <td>372</td>\n",
       "      <td>False</td>\n",
       "      <td>1427756246</td>\n",
       "      <td>00001607209C5774DF9207A2AC0EED5F</td>\n",
       "      <td>460</td>\n",
       "      <td>693</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>54DD89D0466F17FCA84238436953A625</td>\n",
       "      <td>7B5C160AE47825DD4DABFEE9C99B8D26</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581737171</td>\n",
       "      <td>487BCB8F1677A6D9EEE612940410CBB6</td>\n",
       "      <td>517</td>\n",
       "      <td>407</td>\n",
       "      <td>False</td>\n",
       "      <td>1449096567</td>\n",
       "      <td>00001607209C5774DF9207A2AC0EED5F</td>\n",
       "      <td>462</td>\n",
       "      <td>694</td>\n",
       "      <td>False</td>\n",
       "      <td>1396311956</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4E5E35BB70F2076B53DF8F8AD5D07DBF</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581560684</td>\n",
       "      <td>AC37125C0F059E186807770C71B921C4</td>\n",
       "      <td>6520</td>\n",
       "      <td>1006</td>\n",
       "      <td>False</td>\n",
       "      <td>1306929769</td>\n",
       "      <td>00002E504FA95B3BC259937F4F6E9E7E</td>\n",
       "      <td>2520</td>\n",
       "      <td>1483</td>\n",
       "      <td>False</td>\n",
       "      <td>1542253460</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>7037FABD43783ED77090542722AFDA23\\t418DF39270AD...</td>\n",
       "      <td>686E12F43EE6BE0638B771248C9C2FE7</td>\n",
       "      <td>Photo</td>\n",
       "      <td>EA7D3E05A8C621A38C19DFF2E990B333</td>\n",
       "      <td>6D323BE93766E79BE423FAC5C28BE39B</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581948274</td>\n",
       "      <td>8E4C938C743BBD9E27B5CE2CF7DBEA97</td>\n",
       "      <td>761672</td>\n",
       "      <td>27</td>\n",
       "      <td>True</td>\n",
       "      <td>1522739809</td>\n",
       "      <td>00002EBA27A6215AD7FCB28362CB17D2</td>\n",
       "      <td>71</td>\n",
       "      <td>479</td>\n",
       "      <td>False</td>\n",
       "      <td>1576184441</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            hashtags  \\\n",
       "0                                                NaN   \n",
       "1  024FE90EC2C01B3CDC46A5A90D66B020\\t1B78BDD9C7FF...   \n",
       "2                                                NaN   \n",
       "3                   2D09C59493DAC82D8054E79343DFE76A   \n",
       "4                   A0AD2EB95B7C918A311D9432E9A8FF7A   \n",
       "5  9153C7AC0C4F20954C85F75BAD74A7E5\\tD431B857B80B...   \n",
       "6                                                NaN   \n",
       "7                   54DD89D0466F17FCA84238436953A625   \n",
       "8                                                NaN   \n",
       "9  7037FABD43783ED77090542722AFDA23\\t418DF39270AD...   \n",
       "\n",
       "                           tweet_id  media                             links  \\\n",
       "0  04746004AA1F5498834CE7A4C6343D1A    NaN                               NaN   \n",
       "1  B5C4CBE185831F3E5A58A4D81118D4C7    NaN                               NaN   \n",
       "2  6B6836351BFAA6D1CC1EB0386BCB8C6A  Video                               NaN   \n",
       "3  0DCF558E40500F22F84F98C4E7C38EDC  Photo                               NaN   \n",
       "4  F13AA57F12DD6107D9D8544A27BDE9EC  Photo                               NaN   \n",
       "5  01ADD87BE33683487889D721B5DD3910    NaN                               NaN   \n",
       "6  5E35903C153D1CA5503B8AD186A652F3    NaN  0A07DC86E990CF6D15A8C06150FE75D2   \n",
       "7  7B5C160AE47825DD4DABFEE9C99B8D26  Video                               NaN   \n",
       "8  4E5E35BB70F2076B53DF8F8AD5D07DBF  Video                               NaN   \n",
       "9  686E12F43EE6BE0638B771248C9C2FE7  Photo  EA7D3E05A8C621A38C19DFF2E990B333   \n",
       "\n",
       "                            domains tweet_type  \\\n",
       "0                               NaN   TopLevel   \n",
       "1                               NaN    Retweet   \n",
       "2                               NaN    Retweet   \n",
       "3                               NaN    Retweet   \n",
       "4                               NaN    Retweet   \n",
       "5                               NaN    Retweet   \n",
       "6  3896E26D12C903F0A00B6B1BE9A9BEA3   TopLevel   \n",
       "7                               NaN    Retweet   \n",
       "8                               NaN   TopLevel   \n",
       "9  6D323BE93766E79BE423FAC5C28BE39B   TopLevel   \n",
       "\n",
       "                           language   timestamp  \\\n",
       "0  22C448FF81263D4BAF2A176145EE9EAD  1581759640   \n",
       "1  22C448FF81263D4BAF2A176145EE9EAD  1581668217   \n",
       "2  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582046459   \n",
       "3  125C57F4FA6D4E110983FB11B52EFD4E  1582083666   \n",
       "4  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581779241   \n",
       "5  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581733184   \n",
       "6  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582023785   \n",
       "7  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581737171   \n",
       "8  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581560684   \n",
       "9  22C448FF81263D4BAF2A176145EE9EAD  1581948274   \n",
       "\n",
       "                          a_user_id  a_follower_count  a_following_count  \\\n",
       "0  6720CC7830F94CB7465CA283300DB010               119                125   \n",
       "1  7DDC67265CFB6E0B4820E0BD0E33A8D3               189                264   \n",
       "2  5456A10C7E4F7A415948EA88BE6845D6              4312                660   \n",
       "3  9D421C234C7B59A0EDC8D85C847D4569               272                185   \n",
       "4  F63ECD1C7827E767E7C44E9A717056AC              1020               2097   \n",
       "5  A1252D063C9C6AE6C8BDE545D2EF3A17            796758             873936   \n",
       "6  70AAF2A997947D596A573D439E0634C8               178                372   \n",
       "7  487BCB8F1677A6D9EEE612940410CBB6               517                407   \n",
       "8  AC37125C0F059E186807770C71B921C4              6520               1006   \n",
       "9  8E4C938C743BBD9E27B5CE2CF7DBEA97            761672                 27   \n",
       "\n",
       "   a_is_verified  a_account_creation                         b_user_id  \\\n",
       "0          False          1571666822  00000776B07587ECA9717BFC301F2D6E   \n",
       "1          False          1575966890  00000776B07587ECA9717BFC301F2D6E   \n",
       "2          False          1494251627  00000B85AAF7DE172876FD96718C4469   \n",
       "3          False          1559086871  00000E0C9B364891CDE89ECFC54771DE   \n",
       "4          False          1468438879  0000109A57AFA64758EE4AAE2A01BFC7   \n",
       "5           True          1348444465  00001607209C5774DF9207A2AC0EED5F   \n",
       "6          False          1427756246  00001607209C5774DF9207A2AC0EED5F   \n",
       "7          False          1449096567  00001607209C5774DF9207A2AC0EED5F   \n",
       "8          False          1306929769  00002E504FA95B3BC259937F4F6E9E7E   \n",
       "9           True          1522739809  00002EBA27A6215AD7FCB28362CB17D2   \n",
       "\n",
       "   b_follower_count  b_following_count  b_is_verified  b_account_creation  \\\n",
       "0               111                673          False          1478011810   \n",
       "1               111                673          False          1478011810   \n",
       "2              1150                 48          False          1540395738   \n",
       "3               781                442          False          1432084055   \n",
       "4                15                123          False          1385502405   \n",
       "5               462                694          False          1396311956   \n",
       "6               460                693          False          1396311956   \n",
       "7               462                694          False          1396311956   \n",
       "8              2520               1483          False          1542253460   \n",
       "9                71                479          False          1576184441   \n",
       "\n",
       "   b_follows_a  \n",
       "0         True  \n",
       "1         True  \n",
       "2         True  \n",
       "3         True  \n",
       "4        False  \n",
       "5         True  \n",
       "6         True  \n",
       "7         True  \n",
       "8         True  \n",
       "9        False  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt.columns = features\n",
    "gc.collect()\n",
    "\n",
    "dt = dt.drop('text_tokens', axis=1)\n",
    "gc.collect()\n",
    "\n",
    "dt.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "133821166\n",
      "(121386431, 24) (12434735, 24) (12434838, 24)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>04746004AA1F5498834CE7A4C6343D1A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581759640</td>\n",
       "      <td>6720CC7830F94CB7465CA283300DB010</td>\n",
       "      <td>119</td>\n",
       "      <td>...</td>\n",
       "      <td>111</td>\n",
       "      <td>673</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>133821166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>024FE90EC2C01B3CDC46A5A90D66B020\\t1B78BDD9C7FF...</td>\n",
       "      <td>B5C4CBE185831F3E5A58A4D81118D4C7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581668217</td>\n",
       "      <td>7DDC67265CFB6E0B4820E0BD0E33A8D3</td>\n",
       "      <td>189</td>\n",
       "      <td>...</td>\n",
       "      <td>111</td>\n",
       "      <td>673</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>133821167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>6B6836351BFAA6D1CC1EB0386BCB8C6A</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582046459</td>\n",
       "      <td>5456A10C7E4F7A415948EA88BE6845D6</td>\n",
       "      <td>4312</td>\n",
       "      <td>...</td>\n",
       "      <td>1150</td>\n",
       "      <td>48</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>133821168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2D09C59493DAC82D8054E79343DFE76A</td>\n",
       "      <td>0DCF558E40500F22F84F98C4E7C38EDC</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>125C57F4FA6D4E110983FB11B52EFD4E</td>\n",
       "      <td>1582083666</td>\n",
       "      <td>9D421C234C7B59A0EDC8D85C847D4569</td>\n",
       "      <td>272</td>\n",
       "      <td>...</td>\n",
       "      <td>781</td>\n",
       "      <td>442</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>133821169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A0AD2EB95B7C918A311D9432E9A8FF7A</td>\n",
       "      <td>F13AA57F12DD6107D9D8544A27BDE9EC</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581779241</td>\n",
       "      <td>F63ECD1C7827E767E7C44E9A717056AC</td>\n",
       "      <td>1020</td>\n",
       "      <td>...</td>\n",
       "      <td>15</td>\n",
       "      <td>123</td>\n",
       "      <td>False</td>\n",
       "      <td>1385502405</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>133821170</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            hashtags  \\\n",
       "0                                                NaN   \n",
       "1  024FE90EC2C01B3CDC46A5A90D66B020\\t1B78BDD9C7FF...   \n",
       "2                                                NaN   \n",
       "3                   2D09C59493DAC82D8054E79343DFE76A   \n",
       "4                   A0AD2EB95B7C918A311D9432E9A8FF7A   \n",
       "\n",
       "                           tweet_id  media links domains tweet_type  \\\n",
       "0  04746004AA1F5498834CE7A4C6343D1A    NaN   NaN     NaN   TopLevel   \n",
       "1  B5C4CBE185831F3E5A58A4D81118D4C7    NaN   NaN     NaN    Retweet   \n",
       "2  6B6836351BFAA6D1CC1EB0386BCB8C6A  Video   NaN     NaN    Retweet   \n",
       "3  0DCF558E40500F22F84F98C4E7C38EDC  Photo   NaN     NaN    Retweet   \n",
       "4  F13AA57F12DD6107D9D8544A27BDE9EC  Photo   NaN     NaN    Retweet   \n",
       "\n",
       "                           language   timestamp  \\\n",
       "0  22C448FF81263D4BAF2A176145EE9EAD  1581759640   \n",
       "1  22C448FF81263D4BAF2A176145EE9EAD  1581668217   \n",
       "2  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582046459   \n",
       "3  125C57F4FA6D4E110983FB11B52EFD4E  1582083666   \n",
       "4  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581779241   \n",
       "\n",
       "                          a_user_id  a_follower_count  ...  b_follower_count  \\\n",
       "0  6720CC7830F94CB7465CA283300DB010               119  ...               111   \n",
       "1  7DDC67265CFB6E0B4820E0BD0E33A8D3               189  ...               111   \n",
       "2  5456A10C7E4F7A415948EA88BE6845D6              4312  ...              1150   \n",
       "3  9D421C234C7B59A0EDC8D85C847D4569               272  ...               781   \n",
       "4  F63ECD1C7827E767E7C44E9A717056AC              1020  ...                15   \n",
       "\n",
       "   b_following_count  b_is_verified b_account_creation  b_follows_a  reply  \\\n",
       "0                673          False         1478011810         True      0   \n",
       "1                673          False         1478011810         True      0   \n",
       "2                 48          False         1540395738         True      0   \n",
       "3                442          False         1432084055         True      0   \n",
       "4                123          False         1385502405        False      0   \n",
       "\n",
       "   retweet  retweet_comment  like         id  \n",
       "0        0                0     0  133821166  \n",
       "1        0                0     0  133821167  \n",
       "2        0                0     0  133821168  \n",
       "3        0                0     0  133821169  \n",
       "4        0                0     0  133821170  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt['reply']           = 0\n",
    "dt['retweet']         = 0\n",
    "dt['retweet_comment'] = 0\n",
    "dt['like']            = 0\n",
    "\n",
    "dt['id']   = np.arange( df.shape[0]+dv.shape[0] , df.shape[0]+dv.shape[0]+dt.shape[0] )\n",
    "dt['id']   = dt['id'].astype(np.uint32)\n",
    "print( dt['id'].min() )\n",
    "\n",
    "dt['reply']           = dt['reply'].astype( np.uint32 )\n",
    "dt['retweet']         = dt['retweet'].astype( np.uint32 )\n",
    "dt['retweet_comment'] = dt['retweet_comment'].astype( np.uint32 )\n",
    "dt['like']            = dt['like'].astype( np.uint32 )\n",
    "\n",
    "dt['timestamp']         = dt['timestamp'].astype( np.uint32 )\n",
    "dt['a_follower_count']  = dt['a_follower_count'].astype( np.uint32 )\n",
    "dt['a_following_count'] = dt['a_following_count'].astype( np.uint32 )\n",
    "dt['a_account_creation']= dt['a_account_creation'].astype( np.uint32 )\n",
    "dt['b_follower_count']  = dt['b_follower_count'].astype( np.uint32 )\n",
    "dt['b_following_count'] = dt['b_following_count'].astype( np.uint32 )\n",
    "dt['b_account_creation']= dt['b_account_creation'].astype( np.uint32 )\n",
    "gc.collect()\n",
    "\n",
    "print(df.shape,dv.shape,dt.shape)\n",
    "dt.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "121386431 12434735 12434838\n"
     ]
    }
   ],
   "source": [
    "train_size = df.shape[0]\n",
    "test0_size = dv.shape[0]\n",
    "test1_size = dt.shape[0]\n",
    "print(train_size,test0_size,test1_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(146256004, 24)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "df = pd.concat( (df,dv,dt), sort=False )\n",
    "gc.collect()\n",
    "del dv, dt\n",
    "gc.collect()\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 146256004 entries, 0 to 12434837\n",
      "Data columns (total 24 columns):\n",
      " #   Column              Dtype \n",
      "---  ------              ----- \n",
      " 0   hashtags            object\n",
      " 1   tweet_id            object\n",
      " 2   media               object\n",
      " 3   links               object\n",
      " 4   domains             object\n",
      " 5   tweet_type          object\n",
      " 6   language            object\n",
      " 7   timestamp           uint32\n",
      " 8   a_user_id           object\n",
      " 9   a_follower_count    uint32\n",
      " 10  a_following_count   uint32\n",
      " 11  a_is_verified       bool  \n",
      " 12  a_account_creation  uint32\n",
      " 13  b_user_id           object\n",
      " 14  b_follower_count    uint32\n",
      " 15  b_following_count   uint32\n",
      " 16  b_is_verified       bool  \n",
      " 17  b_account_creation  uint32\n",
      " 18  b_follows_a         bool  \n",
      " 19  reply               uint32\n",
      " 20  retweet             uint32\n",
      " 21  retweet_comment     uint32\n",
      " 22  like                uint32\n",
      " 23  id                  uint32\n",
      "dtypes: bool(3), object(9), uint32(12)\n",
      "memory usage: 17.8+ GB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12434828</th>\n",
       "      <td>D958F80A2C524231CDDCB70D2CFB12D6</td>\n",
       "      <td>E453FEE546052D4B6E8A4E4C30E705A2</td>\n",
       "      <td>Photo</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581787888</td>\n",
       "      <td>BC6F9AE8018419E839DD22BB0523F91A</td>\n",
       "      <td>195013</td>\n",
       "      <td>...</td>\n",
       "      <td>384</td>\n",
       "      <td>771</td>\n",
       "      <td>False</td>\n",
       "      <td>1491543663</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434829</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1F76F9B74256705EAF27F0BA4C17062A</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581753911</td>\n",
       "      <td>257F9B3CB261630C055BC9E2BA06573E</td>\n",
       "      <td>246</td>\n",
       "      <td>...</td>\n",
       "      <td>647</td>\n",
       "      <td>691</td>\n",
       "      <td>False</td>\n",
       "      <td>1470923518</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434830</th>\n",
       "      <td>NaN</td>\n",
       "      <td>5A8FED87D1101020082BE134F20602FC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1581892415</td>\n",
       "      <td>15AAA5DE6C94DEBA61E6D38EDE554315</td>\n",
       "      <td>1434</td>\n",
       "      <td>...</td>\n",
       "      <td>3148</td>\n",
       "      <td>1851</td>\n",
       "      <td>False</td>\n",
       "      <td>1410580297</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434831</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E90CEBC48FABA0E93714D254685DF58E</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>9BF3403E0EB7EA8A256DA9019C0B0716</td>\n",
       "      <td>1581832941</td>\n",
       "      <td>892CA934A1A587E03DDD6BA233763DBC</td>\n",
       "      <td>606</td>\n",
       "      <td>...</td>\n",
       "      <td>85</td>\n",
       "      <td>160</td>\n",
       "      <td>False</td>\n",
       "      <td>1559838543</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434832</th>\n",
       "      <td>NaN</td>\n",
       "      <td>49D06B7B25F2891542A1692AEE5EDA27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Quote</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582001186</td>\n",
       "      <td>0908B0B4B30F39B392D348A3D0B5B883</td>\n",
       "      <td>71562</td>\n",
       "      <td>...</td>\n",
       "      <td>145</td>\n",
       "      <td>289</td>\n",
       "      <td>False</td>\n",
       "      <td>1466966626</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434833</th>\n",
       "      <td>NaN</td>\n",
       "      <td>44DBF621793D97030A0ACD60C083CECB</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>ECED8A16BE2A5E8871FD55F4842F16B1</td>\n",
       "      <td>1582081341</td>\n",
       "      <td>87A04067580BDC47389F7019EE5ACE63</td>\n",
       "      <td>5390</td>\n",
       "      <td>...</td>\n",
       "      <td>56</td>\n",
       "      <td>267</td>\n",
       "      <td>False</td>\n",
       "      <td>1428007228</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146255999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434834</th>\n",
       "      <td>NaN</td>\n",
       "      <td>CB7E305F339C8C604BADD29AD0B4A276</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>D3164C7FBCF2565DDF915B1B3AEFB1DC</td>\n",
       "      <td>1582091134</td>\n",
       "      <td>004939ED381AE2DCFFC735AE48425342</td>\n",
       "      <td>17747</td>\n",
       "      <td>...</td>\n",
       "      <td>1837</td>\n",
       "      <td>129</td>\n",
       "      <td>False</td>\n",
       "      <td>1272381192</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434835</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B681F1137F24971FA9C153CC256212F9</td>\n",
       "      <td>Video</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Retweet</td>\n",
       "      <td>125C57F4FA6D4E110983FB11B52EFD4E</td>\n",
       "      <td>1582086464</td>\n",
       "      <td>8715B7C84D2E77AC31EB5B0BAF8FB0D3</td>\n",
       "      <td>4386</td>\n",
       "      <td>...</td>\n",
       "      <td>8</td>\n",
       "      <td>57</td>\n",
       "      <td>False</td>\n",
       "      <td>1235182992</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434836</th>\n",
       "      <td>1F6D05BCF068D59C3CDC1F935C6DB93D</td>\n",
       "      <td>35987A6210FA9198459C3BAC3E7F4BB7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581665518</td>\n",
       "      <td>F550D42637822CD1027AAB3B67B36602</td>\n",
       "      <td>4236</td>\n",
       "      <td>...</td>\n",
       "      <td>717</td>\n",
       "      <td>464</td>\n",
       "      <td>False</td>\n",
       "      <td>1501554925</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12434837</th>\n",
       "      <td>345CF3DCA1963FF4E6D791CE0A78B703\\t1E956C863E26...</td>\n",
       "      <td>42D76062815ED12928B70CFB8E8F1964</td>\n",
       "      <td>Photo\\tPhoto\\tPhoto\\tPhoto</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TopLevel</td>\n",
       "      <td>22C448FF81263D4BAF2A176145EE9EAD</td>\n",
       "      <td>1581799075</td>\n",
       "      <td>97611ED8757F1743C62778AB10A4A28C</td>\n",
       "      <td>4354</td>\n",
       "      <td>...</td>\n",
       "      <td>717</td>\n",
       "      <td>464</td>\n",
       "      <td>False</td>\n",
       "      <td>1501554925</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>146256003</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   hashtags  \\\n",
       "12434828                   D958F80A2C524231CDDCB70D2CFB12D6   \n",
       "12434829                                                NaN   \n",
       "12434830                                                NaN   \n",
       "12434831                                                NaN   \n",
       "12434832                                                NaN   \n",
       "12434833                                                NaN   \n",
       "12434834                                                NaN   \n",
       "12434835                                                NaN   \n",
       "12434836                   1F6D05BCF068D59C3CDC1F935C6DB93D   \n",
       "12434837  345CF3DCA1963FF4E6D791CE0A78B703\\t1E956C863E26...   \n",
       "\n",
       "                                  tweet_id                       media links  \\\n",
       "12434828  E453FEE546052D4B6E8A4E4C30E705A2                       Photo   NaN   \n",
       "12434829  1F76F9B74256705EAF27F0BA4C17062A                       Video   NaN   \n",
       "12434830  5A8FED87D1101020082BE134F20602FC                         NaN   NaN   \n",
       "12434831  E90CEBC48FABA0E93714D254685DF58E                         NaN   NaN   \n",
       "12434832  49D06B7B25F2891542A1692AEE5EDA27                         NaN   NaN   \n",
       "12434833  44DBF621793D97030A0ACD60C083CECB                         NaN   NaN   \n",
       "12434834  CB7E305F339C8C604BADD29AD0B4A276                         NaN   NaN   \n",
       "12434835  B681F1137F24971FA9C153CC256212F9                       Video   NaN   \n",
       "12434836  35987A6210FA9198459C3BAC3E7F4BB7                         NaN   NaN   \n",
       "12434837  42D76062815ED12928B70CFB8E8F1964  Photo\\tPhoto\\tPhoto\\tPhoto   NaN   \n",
       "\n",
       "         domains tweet_type                          language   timestamp  \\\n",
       "12434828     NaN    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581787888   \n",
       "12434829     NaN    Retweet  22C448FF81263D4BAF2A176145EE9EAD  1581753911   \n",
       "12434830     NaN    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1581892415   \n",
       "12434831     NaN      Quote  9BF3403E0EB7EA8A256DA9019C0B0716  1581832941   \n",
       "12434832     NaN      Quote  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582001186   \n",
       "12434833     NaN    Retweet  ECED8A16BE2A5E8871FD55F4842F16B1  1582081341   \n",
       "12434834     NaN    Retweet  D3164C7FBCF2565DDF915B1B3AEFB1DC  1582091134   \n",
       "12434835     NaN    Retweet  125C57F4FA6D4E110983FB11B52EFD4E  1582086464   \n",
       "12434836     NaN   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581665518   \n",
       "12434837     NaN   TopLevel  22C448FF81263D4BAF2A176145EE9EAD  1581799075   \n",
       "\n",
       "                                 a_user_id  a_follower_count  ...  \\\n",
       "12434828  BC6F9AE8018419E839DD22BB0523F91A            195013  ...   \n",
       "12434829  257F9B3CB261630C055BC9E2BA06573E               246  ...   \n",
       "12434830  15AAA5DE6C94DEBA61E6D38EDE554315              1434  ...   \n",
       "12434831  892CA934A1A587E03DDD6BA233763DBC               606  ...   \n",
       "12434832  0908B0B4B30F39B392D348A3D0B5B883             71562  ...   \n",
       "12434833  87A04067580BDC47389F7019EE5ACE63              5390  ...   \n",
       "12434834  004939ED381AE2DCFFC735AE48425342             17747  ...   \n",
       "12434835  8715B7C84D2E77AC31EB5B0BAF8FB0D3              4386  ...   \n",
       "12434836  F550D42637822CD1027AAB3B67B36602              4236  ...   \n",
       "12434837  97611ED8757F1743C62778AB10A4A28C              4354  ...   \n",
       "\n",
       "          b_follower_count  b_following_count  b_is_verified  \\\n",
       "12434828               384                771          False   \n",
       "12434829               647                691          False   \n",
       "12434830              3148               1851          False   \n",
       "12434831                85                160          False   \n",
       "12434832               145                289          False   \n",
       "12434833                56                267          False   \n",
       "12434834              1837                129          False   \n",
       "12434835                 8                 57          False   \n",
       "12434836               717                464          False   \n",
       "12434837               717                464          False   \n",
       "\n",
       "         b_account_creation  b_follows_a  reply  retweet  retweet_comment  \\\n",
       "12434828         1491543663        False      0        0                0   \n",
       "12434829         1470923518         True      0        0                0   \n",
       "12434830         1410580297        False      0        0                0   \n",
       "12434831         1559838543         True      0        0                0   \n",
       "12434832         1466966626        False      0        0                0   \n",
       "12434833         1428007228        False      0        0                0   \n",
       "12434834         1272381192        False      0        0                0   \n",
       "12434835         1235182992        False      0        0                0   \n",
       "12434836         1501554925         True      0        0                0   \n",
       "12434837         1501554925         True      0        0                0   \n",
       "\n",
       "          like         id  \n",
       "12434828     0  146255994  \n",
       "12434829     0  146255995  \n",
       "12434830     0  146255996  \n",
       "12434831     0  146255997  \n",
       "12434832     0  146255998  \n",
       "12434833     0  146255999  \n",
       "12434834     0  146256000  \n",
       "12434835     0  146256001  \n",
       "12434836     0  146256002  \n",
       "12434837     0  146256003  \n",
       "\n",
       "[10 rows x 24 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['media'] = df['media'].fillna( '' )\n",
    "gc.collect()\n",
    "df['media'] = df['media'].apply( lambda x:  '_'.join(x.split('\\t')[:2]) )\n",
    "gc.collect()\n",
    "\n",
    "df['language'] = pd.factorize( df['language'], sort=True )[0]\n",
    "gc.collect()\n",
    "df['language'] = df['language'].astype( np.uint8 )\n",
    "gc.collect()\n",
    "\n",
    "df['tweet_type'] = pd.factorize( df['tweet_type'], sort=True )[0]\n",
    "gc.collect()\n",
    "df['tweet_type'] = df['tweet_type'].astype( np.uint8 )\n",
    "gc.collect()\n",
    "\n",
    "df['media'] = pd.factorize( df['media'], sort=True )[0]\n",
    "gc.collect()\n",
    "df['media'] = df['media'].astype( np.uint8 )\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# media = {\n",
    "# 0:'',\n",
    "# 1:'GIF',\n",
    "# 2:'GIF_GIF',\n",
    "# 3:'GIF_Photo',\n",
    "# 4:'GIF_Video',\n",
    "# 5:'Photo',\n",
    "# 6:'Photo_GIF',\n",
    "# 7:'Photo_Photo',\n",
    "# 8:'Photo_Video',\n",
    "# 9:'Video',\n",
    "# 10:'Video_GIF',\n",
    "# 11:'Video_Photo',\n",
    "# 12:'Video_Video'\n",
    "# }\n",
    "# TT = {0:'Quote', 1:'Retweet', 2:'TopLevel'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>E7D6C5094767223F6F8789A87A1937AB</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>D557B03872EF8986F7F4426AE094B2FE</td>\n",
       "      <td>986</td>\n",
       "      <td>...</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>129F4A868712BA2B98D31AF98C3066E4</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>424822AC982CE0E8965506C63B44EC12</td>\n",
       "      <td>1225</td>\n",
       "      <td>...</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497559</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497622</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>04C6C2175852CDBBC23B2446C7E7C22D</td>\n",
       "      <td>0</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>1EC14E26417AA926095530AC591BA9CE</td>\n",
       "      <td>3016</td>\n",
       "      <td>...</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581060554</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>168157826315514C120494D4DF8E6216</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>9B9595B6FEB8948BDDF0D222F27E0118</td>\n",
       "      <td>2121</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581328518</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>B3E3673782A69D9D8A45D3B222F0B073</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>525DC99B7CB8F1AC4AD3E66C53FA38E0</td>\n",
       "      <td>813505</td>\n",
       "      <td>...</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1580957807</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            hashtags  \\\n",
       "0                                                NaN   \n",
       "1  83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...   \n",
       "2                                                NaN   \n",
       "3                                                NaN   \n",
       "4                                                NaN   \n",
       "\n",
       "                           tweet_id  media                             links  \\\n",
       "0  E7D6C5094767223F6F8789A87A1937AB      0                               NaN   \n",
       "1  129F4A868712BA2B98D31AF98C3066E4      0                               NaN   \n",
       "2  04C6C2175852CDBBC23B2446C7E7C22D      0  DDFFB4C01DB85921C3580F614575AA6D   \n",
       "3  168157826315514C120494D4DF8E6216      0                               NaN   \n",
       "4  B3E3673782A69D9D8A45D3B222F0B073      5                               NaN   \n",
       "\n",
       "                            domains  tweet_type  language   timestamp  \\\n",
       "0                               NaN           2        11  1581262691   \n",
       "1                               NaN           1        11  1581497241   \n",
       "2  BE4539C53C53FFABCFD232DB100C792B           2        11  1580978528   \n",
       "3                               NaN           1        54  1581321849   \n",
       "4                               NaN           2        11  1580956787   \n",
       "\n",
       "                          a_user_id  a_follower_count  ...  b_follower_count  \\\n",
       "0  D557B03872EF8986F7F4426AE094B2FE               986  ...                94   \n",
       "1  424822AC982CE0E8965506C63B44EC12              1225  ...              1139   \n",
       "2  1EC14E26417AA926095530AC591BA9CE              3016  ...               780   \n",
       "3  9B9595B6FEB8948BDDF0D222F27E0118              2121  ...                 1   \n",
       "4  525DC99B7CB8F1AC4AD3E66C53FA38E0            813505  ...               171   \n",
       "\n",
       "   b_following_count  b_is_verified b_account_creation  b_follows_a  reply  \\\n",
       "0                648          False         1478011810        False      0   \n",
       "1                 46          False         1540395738         True      0   \n",
       "2                440          False         1432084055         True      0   \n",
       "3                 45          False         1534313747        False      0   \n",
       "4                388          False         1490166885        False      0   \n",
       "\n",
       "      retweet  retweet_comment        like  id  \n",
       "0           0                0           0   0  \n",
       "1  1581497559                0  1581497622   1  \n",
       "2           0                0  1581060554   2  \n",
       "3           0                0  1581328518   3  \n",
       "4           0                0  1580957807   4  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gc.collect()\n",
    "uid = pd.factorize( np.concatenate( ( df['a_user_id'].values, df['b_user_id'].values ) ) )[0]\n",
    "gc.collect()\n",
    "uid = uid.astype( np.uint32 )\n",
    "gc.collect()\n",
    "len(uid)\n",
    "\n",
    "df['a_user_id'] = uid[:df.shape[0] ]\n",
    "gc.collect()\n",
    "df['b_user_id'] = uid[ df.shape[0]:]\n",
    "gc.collect()\n",
    "del uid\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['tweet_id'] = pd.factorize( df['tweet_id'] )[0]\n",
    "gc.collect()\n",
    "df['tweet_id'] = df['tweet_id'].astype( np.uint32 )\n",
    "gc.collect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hashtags</th>\n",
       "      <th>tweet_id</th>\n",
       "      <th>media</th>\n",
       "      <th>links</th>\n",
       "      <th>domains</th>\n",
       "      <th>tweet_type</th>\n",
       "      <th>language</th>\n",
       "      <th>timestamp</th>\n",
       "      <th>a_user_id</th>\n",
       "      <th>a_follower_count</th>\n",
       "      <th>...</th>\n",
       "      <th>b_follower_count</th>\n",
       "      <th>b_following_count</th>\n",
       "      <th>b_is_verified</th>\n",
       "      <th>b_account_creation</th>\n",
       "      <th>b_follows_a</th>\n",
       "      <th>reply</th>\n",
       "      <th>retweet</th>\n",
       "      <th>retweet_comment</th>\n",
       "      <th>like</th>\n",
       "      <th>id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1581262691</td>\n",
       "      <td>0</td>\n",
       "      <td>986</td>\n",
       "      <td>...</td>\n",
       "      <td>94</td>\n",
       "      <td>648</td>\n",
       "      <td>False</td>\n",
       "      <td>1478011810</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>1581497241</td>\n",
       "      <td>1</td>\n",
       "      <td>1225</td>\n",
       "      <td>...</td>\n",
       "      <td>1139</td>\n",
       "      <td>46</td>\n",
       "      <td>False</td>\n",
       "      <td>1540395738</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497559</td>\n",
       "      <td>0</td>\n",
       "      <td>1581497622</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>DDFFB4C01DB85921C3580F614575AA6D</td>\n",
       "      <td>BE4539C53C53FFABCFD232DB100C792B</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580978528</td>\n",
       "      <td>2</td>\n",
       "      <td>3016</td>\n",
       "      <td>...</td>\n",
       "      <td>780</td>\n",
       "      <td>440</td>\n",
       "      <td>False</td>\n",
       "      <td>1432084055</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581060554</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>54</td>\n",
       "      <td>1581321849</td>\n",
       "      <td>3</td>\n",
       "      <td>2121</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>45</td>\n",
       "      <td>False</td>\n",
       "      <td>1534313747</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1581328518</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>11</td>\n",
       "      <td>1580956787</td>\n",
       "      <td>4</td>\n",
       "      <td>813505</td>\n",
       "      <td>...</td>\n",
       "      <td>171</td>\n",
       "      <td>388</td>\n",
       "      <td>False</td>\n",
       "      <td>1490166885</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1580957807</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                            hashtags  tweet_id  media  \\\n",
       "0                                                NaN         0      0   \n",
       "1  83D6C79F5FCEC8D1CAD9E82C2C261611\\tFFAD2DCF664C...         1      0   \n",
       "2                                                NaN         2      0   \n",
       "3                                                NaN         3      0   \n",
       "4                                                NaN         4      5   \n",
       "\n",
       "                              links                           domains  \\\n",
       "0                               NaN                               NaN   \n",
       "1                               NaN                               NaN   \n",
       "2  DDFFB4C01DB85921C3580F614575AA6D  BE4539C53C53FFABCFD232DB100C792B   \n",
       "3                               NaN                               NaN   \n",
       "4                               NaN                               NaN   \n",
       "\n",
       "   tweet_type  language   timestamp  a_user_id  a_follower_count  ...  \\\n",
       "0           2        11  1581262691          0               986  ...   \n",
       "1           1        11  1581497241          1              1225  ...   \n",
       "2           2        11  1580978528          2              3016  ...   \n",
       "3           1        54  1581321849          3              2121  ...   \n",
       "4           2        11  1580956787          4            813505  ...   \n",
       "\n",
       "   b_follower_count  b_following_count  b_is_verified  b_account_creation  \\\n",
       "0                94                648          False          1478011810   \n",
       "1              1139                 46          False          1540395738   \n",
       "2               780                440          False          1432084055   \n",
       "3                 1                 45          False          1534313747   \n",
       "4               171                388          False          1490166885   \n",
       "\n",
       "   b_follows_a  reply     retweet  retweet_comment        like  id  \n",
       "0        False      0           0                0           0   0  \n",
       "1         True      0  1581497559                0  1581497622   1  \n",
       "2         True      0           0                0  1581060554   2  \n",
       "3        False      0           0                0  1581328518   3  \n",
       "4        False      0           0                0  1580957807   4  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "40"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.to_pickle('tmp.pkl')\n",
    "gc.collect()\n",
    "#Restart Kernel here to release RAM memory"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
